rm(list=ls())
gc()
library(tidyverse)
library(qgraph)
library(pander)
library(summarytools)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(tidymodels)
library(knitr)
library(extrafont)
## for poisson class of elastic net
library(poissonreg)
library("sva")
### plotting libraries
library(ggtext)
library(ggpubr)
library(cowplot)
library(ggthemes)
### package for pls analysis (all packages are necessary for the model to run)
library("pls")
library("mixOmics")
library(plsmod)
### scatter plot packages
library(viridis)
library(ggpointdensity)
mixOmics was forcely installed in the environment. Some the select function from dplyr may not work. This is also true for map functions. So dplyr::select and purrr::map are suggested
Using ABCD 4.0
source(paste0(scriptfold,"stacking_gfactor_modelling/r_functions.R"))
set up parallel
# parallel for ubuntu
doParallel::registerDoParallel(cores=15)
## this one works for ubuntu but slow
#library(doFuture)
#registerDoFuture()
#plan(multicore(workers = 30))
### parallel for windows
#library(doFuture)
#registerDoFuture()
#plan(multisession(workers = 30))
ACS <-read_csv(paste0(dataFold,"ACSPSW03_DATA_TABLE.csv"))
## Rows: 23101 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): SUBJECTKEY, SRC_SUBJECT_ID, INTERVIEW_DATE, SEX, EVENTNAME, GENETI...
## dbl (18): ACSPSW03_ID, DATASET_ID, INTERVIEW_AGE, RACE_ETHNICITY, REL_FAMILY...
## lgl (6): GENETIC_PAIRED_SUBJECTID_4, GENETIC_PI_HAT_3, GENETIC_PI_HAT_4, GE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#knitr::kable(glimpse(ACS))
#race_ethnicity
#1 = White; 2 = Black; 3 = Hispanic; 4 = Asian; 5 = Other
# guardian-report relationship
# Relationship of the participant in his or her family
# 0 = single; 1 = sibling; 2 = twin; 3 = triplet
# ACS %>% count(REL_RELATIONSHIP)
ACSselected <- ACS %>%
dplyr::select(all_of(c("SUBJECTKEY", "EVENTNAME", "SEX", "INTERVIEW_AGE", "RACE_ETHNICITY",
"REL_FAMILY_ID", "ACS_RAKED_PROPENSITY_SCORE"))) %>%
mutate(RACE_ETHNICITY = recode_factor(as.factor(RACE_ETHNICITY),
`1` = "White", `2` = "Black", `3` = "Hispanic", `4` = "Asian", `5` = "Other",
.default = "White")) %>%
mutate(SEX = as.factor(SEX)) %>%
mutate(REL_FAMILY_ID = as.factor(REL_FAMILY_ID))
ACSselected %>%
filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 7 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| factor | 3 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11876 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| SEX | 0 | 1 | FALSE | 2 | M: 6196, F: 5680 |
| RACE_ETHNICITY | 2 | 1 | FALSE | 5 | Whi: 6180, His: 2411, Bla: 1784, Oth: 1247 |
| REL_FAMILY_ID | 0 | 1 | FALSE | 9854 | 373: 5, 749: 4, 11: 3, 400: 3 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| INTERVIEW_AGE | 0 | 1 | 118.98 | 7.50 | 107.00 | 112.00 | 119.00 | 126.00 | 133.00 | ▇▆▆▆▆ |
| ACS_RAKED_PROPENSITY_SCORE | 0 | 1 | 691.34 | 350.98 | 161.36 | 449.35 | 619.31 | 821.74 | 1778.92 | ▅▇▂▂▁ |
###loading site and scanner information
Siteinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_LT01_DATA_TABLE.csv")))
Change the wrong site manually based on the: “Release Notes: Adolescent Brain Cognitive Development Study ℠ (ABCD Study ® ) Data Release 4.0 Changes and Known Issues”
only fixed the baseline and two year followup that is used in the analysis
As ID is required not to show in the coding shared online, please refer to this file for the exact subject IDs.
Siteinfo_fixed <- Siteinfo
site_fix <- readRDS(paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/site_fix', '.RData'))
for(i in 1:dim(site_fix)[1]){
fix_site_id <- site_fix$SUBJECTKEY[i]
fix_site_event <- site_fix$EVENTNAME[i]
fix_site <- site_fix$SITE_ID_L[i]
Siteinfo_fixed$SITE_ID_L[which(Siteinfo_fixed$SUBJECTKEY== fix_site_id& Siteinfo_fixed$EVENTNAME == fix_site_event)] <- fix_site
}
Siteinfo <-Siteinfo_fixed
vision_idx <- as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>%
mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))
#vision_idx %>% select(SNELLEN_VA_Y, VIS_FLG, visionProb) %>% arrange(SNELLEN_VA_Y)
plotting_names <- readxl::read_excel(paste0(scriptfold,"Common_psy_gene_brain_all/PsychopathologyPlottingNames_NP.xlsx")) %>%dplyr::select(-"dictionary_name")
plotting_names <- plotting_names %>%
rename(feature_names=variable_name)
Parent Adult Self Report Raw Scores
Adaptive Functioning and Strengths Scales: Education; Friends; Spouse/Partner; Family; Job; Personal Strengths
Syndrome Scales: Anxious/Depressed; Withdrawn; Somatic Complaints; Thought Problems; Attention Problems; Aggressive Behavior; Rule-breaking Behavior; and Intrusive
DSM-5-oriented Scales: Depressive Problems; Anxiety Problems; Somatic Problems; Avoidant Personality Problems; Attention Deficit/Hyperactivity Problems (Inattention and Hyperactivity/Impulsivity subscales); and Antisocial Personality Problems
Substance Use Scales: Tobacco; Alcohol; Drugs
Internalizing (INT), Attention Problems (ATT), Externalizing (EXT), and Total Problems (TOT) scales.
ASRPrecomputed <-read_csv(paste0(dataFold,"ABCD_ASRS01_DATA_TABLE.csv"))
#glimpse(ASRPrecomputed)
ASRPrecomputed_names<- ASRPrecomputed %>% dplyr::select(ends_with("_R") )%>% colnames()
ASRPrecomputedSelected <- ASRPrecomputed %>% dplyr::select(all_of(c("SUBJECTKEY",
"EVENTNAME", ASRPrecomputed_names)))
ASRPrecomputedSelected %>%
filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 22 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 20 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11876 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| ASR_SCR_PERSTR_R | 5 | 1 | 16.52 | 3.52 | 0 | 15 | 17 | 19 | 22 | ▁▁▂▇▇ |
| ASR_SCR_ANXDEP_R | 5 | 1 | 5.03 | 4.93 | 0 | 1 | 4 | 7 | 35 | ▇▂▁▁▁ |
| ASR_SCR_WITHDRAWN_R | 5 | 1 | 1.56 | 2.11 | 0 | 0 | 1 | 2 | 17 | ▇▁▁▁▁ |
| ASR_SCR_SOMATIC_R | 5 | 1 | 2.94 | 3.18 | 0 | 1 | 2 | 4 | 23 | ▇▂▁▁▁ |
| ASR_SCR_THOUGHT_R | 5 | 1 | 1.43 | 1.85 | 0 | 0 | 1 | 2 | 18 | ▇▁▁▁▁ |
| ASR_SCR_ATTENTION_R | 5 | 1 | 4.63 | 4.26 | 0 | 1 | 4 | 7 | 27 | ▇▃▁▁▁ |
| ASR_SCR_AGGRESSIVE_R | 5 | 1 | 3.35 | 3.57 | 0 | 1 | 2 | 5 | 25 | ▇▂▁▁▁ |
| ASR_SCR_RULEBREAK_R | 5 | 1 | 1.19 | 1.85 | 0 | 0 | 0 | 2 | 18 | ▇▁▁▁▁ |
| ASR_SCR_INTRUSIVE_R | 5 | 1 | 1.00 | 1.43 | 0 | 0 | 0 | 2 | 10 | ▇▁▁▁▁ |
| ASR_SCR_INTERNAL_R | 5 | 1 | 9.53 | 8.74 | 0 | 3 | 7 | 13 | 70 | ▇▂▁▁▁ |
| ASR_SCR_EXTERNAL_R | 5 | 1 | 5.54 | 5.62 | 0 | 1 | 4 | 8 | 39 | ▇▂▁▁▁ |
| ASR_SCR_TOTPROB_R | 4 | 1 | 21.14 | 17.96 | 0 | 8 | 16 | 29 | 154 | ▇▂▁▁▁ |
| ASR_SCR_DEPRESS_R | 5 | 1 | 3.96 | 3.66 | 0 | 1 | 3 | 6 | 28 | ▇▂▁▁▁ |
| ASR_SCR_ANXDISORD_R | 5 | 1 | 3.75 | 2.56 | 0 | 2 | 3 | 5 | 12 | ▇▆▆▂▁ |
| ASR_SCR_SOMATICPR_R | 5 | 1 | 1.91 | 2.37 | 0 | 0 | 1 | 3 | 18 | ▇▂▁▁▁ |
| ASR_SCR_AVOIDANT_R | 5 | 1 | 2.01 | 2.15 | 0 | 0 | 1 | 3 | 14 | ▇▃▁▁▁ |
| ASR_SCR_ADHD_R | 5 | 1 | 3.80 | 3.64 | 0 | 1 | 3 | 6 | 25 | ▇▂▁▁▁ |
| ASR_SCR_ANTISOCIAL_R | 5 | 1 | 2.10 | 2.49 | 0 | 0 | 1 | 3 | 22 | ▇▁▁▁▁ |
| ASR_SCR_INATTENTION_R | 5 | 1 | 2.35 | 2.35 | 0 | 0 | 2 | 4 | 14 | ▇▃▁▁▁ |
| ASR_SCR_HYPERACTIVE_R | 5 | 1 | 1.45 | 1.73 | 0 | 0 | 1 | 2 | 12 | ▇▂▁▁▁ |
ASRPrecomputedSelected %>%
filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 22 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 20 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 10414 | 0 |
| EVENTNAME | 0 | 1 | 24 | 24 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| ASR_SCR_PERSTR_R | 57 | 0.99 | 16.10 | 3.70 | 0 | 14 | 17 | 19 | 22 | ▁▁▃▇▇ |
| ASR_SCR_ANXDEP_R | 57 | 0.99 | 5.04 | 4.96 | 0 | 1 | 4 | 7 | 34 | ▇▂▁▁▁ |
| ASR_SCR_WITHDRAWN_R | 57 | 0.99 | 1.57 | 2.07 | 0 | 0 | 1 | 2 | 18 | ▇▁▁▁▁ |
| ASR_SCR_SOMATIC_R | 57 | 0.99 | 2.95 | 3.17 | 0 | 1 | 2 | 4 | 22 | ▇▂▁▁▁ |
| ASR_SCR_THOUGHT_R | 57 | 0.99 | 1.28 | 1.71 | 0 | 0 | 1 | 2 | 17 | ▇▁▁▁▁ |
| ASR_SCR_ATTENTION_R | 57 | 0.99 | 4.59 | 4.22 | 0 | 1 | 3 | 7 | 29 | ▇▃▁▁▁ |
| ASR_SCR_AGGRESSIVE_R | 57 | 0.99 | 3.13 | 3.38 | 0 | 1 | 2 | 5 | 26 | ▇▂▁▁▁ |
| ASR_SCR_RULEBREAK_R | 57 | 0.99 | 1.06 | 1.73 | 0 | 0 | 0 | 2 | 21 | ▇▁▁▁▁ |
| ASR_SCR_INTRUSIVE_R | 57 | 0.99 | 0.95 | 1.39 | 0 | 0 | 0 | 1 | 11 | ▇▁▁▁▁ |
| ASR_SCR_INTERNAL_R | 57 | 0.99 | 9.55 | 8.73 | 0 | 3 | 7 | 13 | 65 | ▇▂▁▁▁ |
| ASR_SCR_EXTERNAL_R | 57 | 0.99 | 5.15 | 5.30 | 0 | 1 | 4 | 7 | 57 | ▇▁▁▁▁ |
| ASR_SCR_TOTPROB_R | 57 | 0.99 | 20.57 | 17.48 | 0 | 8 | 16 | 28 | 133 | ▇▂▁▁▁ |
| ASR_SCR_DEPRESS_R | 57 | 0.99 | 4.00 | 3.67 | 0 | 1 | 3 | 6 | 25 | ▇▂▁▁▁ |
| ASR_SCR_ANXDISORD_R | 57 | 0.99 | 3.64 | 2.53 | 0 | 2 | 3 | 5 | 12 | ▇▆▆▁▁ |
| ASR_SCR_SOMATICPR_R | 57 | 0.99 | 1.92 | 2.36 | 0 | 0 | 1 | 3 | 17 | ▇▂▁▁▁ |
| ASR_SCR_AVOIDANT_R | 57 | 0.99 | 1.98 | 2.11 | 0 | 0 | 1 | 3 | 13 | ▇▃▁▁▁ |
| ASR_SCR_ADHD_R | 57 | 0.99 | 3.68 | 3.54 | 0 | 1 | 3 | 5 | 23 | ▇▃▁▁▁ |
| ASR_SCR_ANTISOCIAL_R | 57 | 0.99 | 2.00 | 2.35 | 0 | 0 | 1 | 3 | 30 | ▇▁▁▁▁ |
| ASR_SCR_INATTENTION_R | 57 | 0.99 | 2.35 | 2.33 | 0 | 0 | 2 | 4 | 14 | ▇▃▁▁▁ |
| ASR_SCR_HYPERACTIVE_R | 57 | 0.99 | 1.33 | 1.66 | 0 | 0 | 1 | 2 | 12 | ▇▁▁▁▁ |
ASRPrecomputedSelected %>%
filter(EVENTNAME =="3_year_follow_up_y_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 6251 |
| Number of columns | 22 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 20 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 6251 | 0 |
| EVENTNAME | 0 | 1 | 24 | 24 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| ASR_SCR_PERSTR_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_ANXDEP_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_WITHDRAWN_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_SOMATIC_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_THOUGHT_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_ATTENTION_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_AGGRESSIVE_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_RULEBREAK_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_INTRUSIVE_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_INTERNAL_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_EXTERNAL_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_TOTPROB_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_DEPRESS_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_ANXDISORD_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_SOMATICPR_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_AVOIDANT_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_ADHD_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_ANTISOCIAL_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_INATTENTION_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_HYPERACTIVE_R | 6251 | 0 | NaN | NA | NA | NA | NA | NA | NA |
BISBAS <-as_tibble(read.csv(paste0(dataFold,"ABCD_BISBAS01_DATA_TABLE.csv"))) %>%
# filter(EVENTNAME =="baseline_year_1_arm_1") %>%
mutate(BISAvg=rowMeans(cbind(BISBAS2_Y,BISBAS3_Y,BISBAS4_Y,BISBAS6_Y), na.rm=F)) %>%
mutate(BASRRAvg=rowMeans(cbind(BISBAS8_Y,BISBAS10_Y,BISBAS11_Y,BISBAS12_Y), na.rm=F)) %>%
mutate(BASDriveAvg=rowMeans(cbind(BISBAS13_Y,BISBAS14_Y,BISBAS15_Y,BISBAS16_Y), na.rm=F)) %>%
mutate(BASFunAvg=rowMeans(cbind(BISBAS17_Y,BISBAS18_Y,BISBAS19_Y,BISBAS20_Y), na.rm=F)) %>%
mutate(BASAllAvg=rowMeans(cbind(BASRRAvg,BASDriveAvg,BASFunAvg), na.rm=F)) %>%
dplyr::select(all_of(c('SUBJECTKEY','EVENTNAME','BISAvg','BASRRAvg','BASDriveAvg','BASFunAvg','BASAllAvg')))
BISBAS %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
ggplot(aes(x=BISAvg), bins = 100) +
geom_histogram(color="black", fill="white") +
xlab("averaged BIS") +
theme_classic(base_size = 30)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 22 rows containing non-finite values (`stat_bin()`).
BISBAS %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1" ) %>%
ggplot(aes(x=BISAvg), bins = 100) +
geom_histogram(color="black", fill="white") +
xlab("averaged BIS") +
theme_classic(base_size = 30)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 30 rows containing non-finite values (`stat_bin()`).
BISBAS %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
ggplot(aes(x=BASAllAvg)) +
geom_histogram(color="black", fill="white", bins = 100) +
xlab("averaged BAS") +
theme_classic(base_size = 30)
## Warning: Removed 23 rows containing non-finite values (`stat_bin()`).
BISBAS %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
ggplot(aes(x=BASAllAvg)) +
geom_histogram(color="black", fill="white", bins = 100) +
xlab("averaged BAS") +
theme_classic(base_size = 30)
## Warning: Removed 30 rows containing non-finite values (`stat_bin()`).
look like only ABCD Prodromal Psychosis Scale (PPS) and UPSS (beside
BIS/BAS) are avaliable for the baseline.
Though 4595 data from PPS is missing. So we will not use PPS and only
focus on UPPS.
#ABCD Sum Scores Mental Health Youth
mentalHealthYouth <-as_tibble(read.csv(paste0(dataFold,"ABCD_MHY02_DATA_TABLE.csv")))
mentalHealthYouth %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 100 |
| _______________________ | |
| Column type frequency: | |
| logical | 13 |
| numeric | 87 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| PLE_Y_SS_TOTAL_GOOD_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_TOTAL_BAD_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_AFFECT_SUM_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_MEAN_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_GOOD_SUM_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_GOOD_MEAN_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_BAD_SUM_NT | 11876 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_BAD_MEAN_NT | 11876 | 0 | NaN | : |
| DELQ_Y_SS_SUM | 11876 | 0 | NaN | : |
| DELQ_Y_SS_SUM_NM | 11876 | 0 | NaN | : |
| DELQ_Y_SS_SUM_NT | 11876 | 0 | NaN | : |
| ERQ_SS_REAPPRAISAL_PR | 11876 | 0 | NaN | : |
| ERQ_SS_SUPPRESS_PR | 11876 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| PLE_Y_SS_TOTAL_NUMBER | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PLE_Y_SS_TOTAL_NUMBER_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PLE_Y_SS_TOTAL_NUMBER_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PLE_Y_SS_TOTAL_GOOD | 0 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| PLE_Y_SS_TOTAL_BAD | 0 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| PLE_Y_SS_AFFECT_SUM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PLE_Y_SS_AFFECTED_GOOD_SUM | 0 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| PLE_Y_SS_AFFECTED_GOOD_MEAN | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PLE_Y_SS_AFFECTED_BAD_SUM | 0 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| PLE_Y_SS_AFFECTED_BAD_MEAN | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PPS_Y_SS_NUMBER | 11 | 1.00 | 2.63 | 3.56 | 0 | 0 | 1 | 4 | 21 | ▇▂▁▁▁ |
| PPS_Y_SS_NUMBER_NM | 0 | 1.00 | 0.02 | 0.69 | 0 | 0 | 0 | 0 | 21 | ▇▁▁▁▁ |
| PPS_Y_SS_NUMBER_NT | 0 | 1.00 | 21.00 | 0.00 | 21 | 21 | 21 | 21 | 21 | ▁▁▇▁▁ |
| PPS_Y_SS_BOTHER_SUM | 4594 | 0.61 | 2.14 | 2.62 | 0 | 0 | 1 | 3 | 19 | ▇▂▁▁▁ |
| PPS_Y_SS_BOTHER_SUM_NM | 0 | 1.00 | 18.37 | 3.56 | 0 | 17 | 20 | 21 | 21 | ▁▁▁▂▇ |
| PPS_Y_SS_BOTHER_SUM_NT | 0 | 1.00 | 21.00 | 0.00 | 21 | 21 | 21 | 21 | 21 | ▁▁▇▁▁ |
| PPS_Y_SS_BOTHER_N_1 | 4594 | 0.61 | 2.15 | 2.07 | 0 | 1 | 2 | 3 | 17 | ▇▂▁▁▁ |
| PPS_Y_SS_BOTHER_N_1_NM | 0 | 1.00 | 18.37 | 3.56 | 0 | 17 | 20 | 21 | 21 | ▁▁▁▂▇ |
| PPS_Y_SS_BOTHER_N_1_NT | 0 | 1.00 | 21.00 | 0.00 | 21 | 21 | 21 | 21 | 21 | ▁▁▇▁▁ |
| PPS_Y_SS_SEVERITY_SCORE | 10 | 1.00 | 6.32 | 10.61 | 0 | 0 | 2 | 8 | 104 | ▇▁▁▁▁ |
| PPS_Y_SS_SEVERITY_SCORE_NM | 0 | 1.00 | 38.05 | 5.67 | 3 | 36 | 40 | 42 | 42 | ▁▁▁▁▇ |
| PPS_Y_SS_SEVERITY_SCORE_NT | 0 | 1.00 | 42.00 | 0.00 | 42 | 42 | 42 | 42 | 42 | ▁▁▇▁▁ |
| PPS_SS_MEAN_SEVERITY | 4595 | 0.61 | 2.15 | 1.10 | 1 | 1 | 2 | 3 | 6 | ▇▃▂▁▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY | 23 | 1.00 | 8.49 | 2.65 | 4 | 7 | 8 | 10 | 16 | ▆▇▇▂▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY_NM | 0 | 1.00 | 0.01 | 0.17 | 0 | 0 | 0 | 0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING | 23 | 1.00 | 7.74 | 2.38 | 4 | 6 | 8 | 9 | 16 | ▆▇▅▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING_NM | 0 | 1.00 | 0.01 | 0.18 | 0 | 0 | 0 | 0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_SENSATION_SEEKING | 23 | 1.00 | 9.77 | 2.68 | 4 | 8 | 10 | 12 | 16 | ▂▅▇▃▂ |
| UPPS_Y_SS_SENSATION_SEEKING_NM | 0 | 1.00 | 0.01 | 0.18 | 0 | 0 | 0 | 0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_SENSATION_SEEKING_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_POSITIVE_URGENCY | 23 | 1.00 | 7.99 | 2.96 | 4 | 6 | 8 | 10 | 16 | ▇▆▆▂▁ |
| UPPS_Y_SS_POSITIVE_URGENCY_NM | 0 | 1.00 | 0.01 | 0.18 | 0 | 0 | 0 | 0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_POSITIVE_URGENCY_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_LACK_OF_PERSEVERANCE | 23 | 1.00 | 7.04 | 2.25 | 4 | 5 | 7 | 8 | 16 | ▇▆▃▁▁ |
| UPPS_Y_SS_LACK_OF_PERS_NM | 0 | 1.00 | 0.01 | 0.18 | 0 | 0 | 0 | 0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_LACK_OF_PERS_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BIS_SUM | 22 | 1.00 | 9.51 | 3.75 | 0 | 7 | 9 | 12 | 21 | ▂▇▇▃▁ |
| BIS_Y_SS_BIS_SUM_NM | 22 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BIS_SUM_NT | 22 | 1.00 | 7.00 | 0.00 | 7 | 7 | 7 | 7 | 7 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_RR | 23 | 1.00 | 11.00 | 2.92 | 0 | 9 | 11 | 13 | 15 | ▁▂▅▇▇ |
| BIS_Y_SS_BAS_RR_NM | 23 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_RR_NT | 23 | 1.00 | 5.00 | 0.00 | 5 | 5 | 5 | 5 | 5 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_DRIVE | 23 | 1.00 | 4.14 | 3.06 | 0 | 2 | 4 | 6 | 12 | ▇▆▅▂▂ |
| BIS_Y_SS_BAS_DRIVE_NM | 23 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_DRIVE_NT | 23 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_FS | 23 | 1.00 | 5.71 | 2.64 | 0 | 4 | 6 | 7 | 12 | ▂▅▇▃▂ |
| BIS_Y_SS_BAS_FS_NM | 23 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_FS_NT | 23 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BISM_SUM | 22 | 1.00 | 5.53 | 2.84 | 0 | 3 | 5 | 8 | 12 | ▃▅▇▃▂ |
| BIS_Y_SS_BISM_SUM_NM | 22 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BISM_SUM_NT | 22 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BASM_RR | 23 | 1.00 | 8.82 | 2.39 | 0 | 7 | 9 | 11 | 12 | ▁▁▅▆▇ |
| BIS_Y_SS_BASM_RR_NM | 23 | 1.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BASM_RR_NT | 23 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BASM_DRIVE | 23 | 1.00 | 4.14 | 3.06 | 0 | 2 | 4 | 6 | 12 | ▇▆▅▂▂ |
| BIS_Y_SS_BASM_DRIVE_NM | 0 | 1.00 | 0.01 | 0.18 | 0 | 0 | 0 | 0 | 4 | ▇▁▁▁▁ |
| BIS_Y_SS_BASM_DRIVE_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| SUP_Y_SS_SUM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| SUP_Y_SS_SUM_NM | 1494 | 0.87 | 7.00 | 0.00 | 7 | 7 | 7 | 7 | 7 | ▁▁▇▁▁ |
| SUP_Y_SS_SUM_NT | 1494 | 0.87 | 7.00 | 0.00 | 7 | 7 | 7 | 7 | 7 | ▁▁▇▁▁ |
| GISH_Y_SS_M_SUM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| GISH_Y_SS_M_SUM_NM | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| GISH_Y_SS_M_SUM_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| GISH_Y_SS_F_SUM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| GISH_Y_SS_F_SUM_NM | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| GISH_Y_SS_F_SUM_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4 | 4 | 4 | 4 | ▁▁▇▁▁ |
| PEQ_SS_RELATIONAL_AGGS_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_RELATIONAL_AGGS_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_RELATIONAL_VICTIM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_RELATIONAL_VICTIM_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_RELATIONAL_VICTIM_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_REPUTATION_AGGS | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_REPUTATION_AGGS_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_REPUTATION_AGGS_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_REPUTATION_VICTIM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_REPUTATION_VICTIM_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_REPUTATION_VICTIM_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_OVERT_AGGRESSION | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_OVERT_AGGRESSION_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_OVERT_AGGRESSION_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_OVERT_VICTIM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_OVERT_VICTIM_NM | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_OVERT_VICTIM_NT | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PEQ_SS_RELATIONAL_AGGS | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| PSTR_SS_PR | 11876 | 0.00 | NaN | NA | NA | NA | NA | NA | NA |
mentalHealthYouth %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 100 |
| _______________________ | |
| Column type frequency: | |
| logical | 13 |
| numeric | 87 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| PLE_Y_SS_TOTAL_GOOD_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_TOTAL_BAD_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_AFFECT_SUM_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_MEAN_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_GOOD_SUM_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_GOOD_MEAN_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_BAD_SUM_NT | 10414 | 0 | NaN | : |
| PLE_Y_SS_AFFECTED_BAD_MEAN_NT | 10414 | 0 | NaN | : |
| DELQ_Y_SS_SUM | 10414 | 0 | NaN | : |
| DELQ_Y_SS_SUM_NM | 10414 | 0 | NaN | : |
| DELQ_Y_SS_SUM_NT | 10414 | 0 | NaN | : |
| ERQ_SS_REAPPRAISAL_PR | 10414 | 0 | NaN | : |
| ERQ_SS_SUPPRESS_PR | 10414 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| PLE_Y_SS_TOTAL_NUMBER | 22 | 1.00 | 5.26 | 3.25 | 0 | 3.00 | 5.00 | 7.0 | 23 | ▇▇▂▁▁ |
| PLE_Y_SS_TOTAL_NUMBER_NM | 0 | 1.00 | 0.05 | 1.15 | 0 | 0.00 | 0.00 | 0.0 | 25 | ▇▁▁▁▁ |
| PLE_Y_SS_TOTAL_NUMBER_NT | 0 | 1.00 | 25.00 | 0.00 | 25 | 25.00 | 25.00 | 25.0 | 25 | ▁▁▇▁▁ |
| PLE_Y_SS_TOTAL_GOOD | 0 | 1.00 | 1.55 | 1.33 | 0 | 0.00 | 1.00 | 2.0 | 8 | ▇▆▁▁▁ |
| PLE_Y_SS_TOTAL_BAD | 0 | 1.00 | 2.39 | 2.25 | 0 | 1.00 | 2.00 | 3.0 | 19 | ▇▂▁▁▁ |
| PLE_Y_SS_AFFECT_SUM | 268 | 0.97 | 8.77 | 6.90 | 0 | 4.00 | 7.00 | 12.0 | 61 | ▇▂▁▁▁ |
| PLE_Y_SS_AFFECTED_GOOD_SUM | 0 | 1.00 | 2.43 | 2.85 | 0 | 0.00 | 2.00 | 4.0 | 18 | ▇▂▁▁▁ |
| PLE_Y_SS_AFFECTED_GOOD_MEAN | 2668 | 0.74 | 1.57 | 1.08 | 0 | 0.67 | 1.67 | 2.5 | 3 | ▇▅▃▅▇ |
| PLE_Y_SS_AFFECTED_BAD_SUM | 0 | 1.00 | 4.84 | 5.35 | 0 | 1.00 | 3.00 | 7.0 | 51 | ▇▁▁▁▁ |
| PLE_Y_SS_AFFECTED_BAD_MEAN | 1902 | 0.82 | 1.94 | 0.77 | 0 | 1.50 | 2.00 | 2.5 | 3 | ▁▅▃▇▇ |
| PPS_Y_SS_NUMBER | 26 | 1.00 | 1.56 | 2.79 | 0 | 0.00 | 0.00 | 2.0 | 21 | ▇▁▁▁▁ |
| PPS_Y_SS_NUMBER_NM | 0 | 1.00 | 0.05 | 1.05 | 0 | 0.00 | 0.00 | 0.0 | 21 | ▇▁▁▁▁ |
| PPS_Y_SS_NUMBER_NT | 0 | 1.00 | 21.00 | 0.00 | 21 | 21.00 | 21.00 | 21.0 | 21 | ▁▁▇▁▁ |
| PPS_Y_SS_BOTHER_SUM | 5929 | 0.43 | 1.68 | 2.19 | 0 | 0.00 | 1.00 | 2.0 | 17 | ▇▁▁▁▁ |
| PPS_Y_SS_BOTHER_SUM_NM | 0 | 1.00 | 19.44 | 2.79 | 0 | 19.00 | 21.00 | 21.0 | 21 | ▁▁▁▁▇ |
| PPS_Y_SS_BOTHER_SUM_NT | 0 | 1.00 | 21.00 | 0.00 | 21 | 21.00 | 21.00 | 21.0 | 21 | ▁▁▇▁▁ |
| PPS_Y_SS_BOTHER_N_1 | 5929 | 0.43 | 1.94 | 2.00 | 0 | 1.00 | 1.00 | 3.0 | 18 | ▇▂▁▁▁ |
| PPS_Y_SS_BOTHER_N_1_NM | 0 | 1.00 | 19.44 | 2.79 | 0 | 19.00 | 21.00 | 21.0 | 21 | ▁▁▁▁▇ |
| PPS_Y_SS_BOTHER_N_1_NT | 0 | 1.00 | 21.00 | 0.00 | 21 | 21.00 | 21.00 | 21.0 | 21 | ▁▁▇▁▁ |
| PPS_Y_SS_SEVERITY_SCORE | 26 | 1.00 | 3.56 | 7.72 | 0 | 0.00 | 0.00 | 4.0 | 89 | ▇▁▁▁▁ |
| PPS_Y_SS_SEVERITY_SCORE_NM | 0 | 1.00 | 39.72 | 4.29 | 5 | 39.00 | 42.00 | 42.0 | 42 | ▁▁▁▁▇ |
| PPS_Y_SS_SEVERITY_SCORE_NT | 0 | 1.00 | 42.00 | 0.00 | 42 | 42.00 | 42.00 | 42.0 | 42 | ▁▁▇▁▁ |
| PPS_SS_MEAN_SEVERITY | 5929 | 0.43 | 2.06 | 1.09 | 0 | 1.00 | 2.00 | 3.0 | 6 | ▇▇▆▂▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY | 30 | 1.00 | 7.76 | 2.34 | 4 | 6.00 | 8.00 | 9.0 | 16 | ▇▇▇▁▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING | 30 | 1.00 | 7.77 | 2.24 | 4 | 6.00 | 8.00 | 9.0 | 16 | ▅▇▃▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_SENSATION_SEEKING | 29 | 1.00 | 9.48 | 2.68 | 3 | 8.00 | 9.00 | 11.0 | 16 | ▂▇▇▇▂ |
| UPPS_Y_SS_SENSATION_SEEKING_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_SENSATION_SEEKING_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_POSITIVE_URGENCY | 30 | 1.00 | 7.39 | 2.67 | 4 | 5.00 | 8.00 | 9.0 | 16 | ▇▆▅▁▁ |
| UPPS_Y_SS_POSITIVE_URGENCY_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_POSITIVE_URGENCY_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| UPPS_Y_SS_LACK_OF_PERSEVERANCE | 30 | 1.00 | 6.97 | 2.25 | 4 | 5.00 | 7.00 | 8.0 | 16 | ▇▇▃▁▁ |
| UPPS_Y_SS_LACK_OF_PERS_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.0 | 4 | ▇▁▁▁▁ |
| UPPS_Y_SS_LACK_OF_PERS_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BIS_SUM | 30 | 1.00 | 8.70 | 3.94 | 0 | 6.00 | 8.00 | 11.0 | 21 | ▃▇▆▃▁ |
| BIS_Y_SS_BIS_SUM_NM | 30 | 1.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BIS_SUM_NT | 30 | 1.00 | 7.00 | 0.00 | 7 | 7.00 | 7.00 | 7.0 | 7 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_RR | 30 | 1.00 | 9.92 | 3.11 | 0 | 8.00 | 10.00 | 12.0 | 15 | ▁▃▇▇▆ |
| BIS_Y_SS_BAS_RR_NM | 30 | 1.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_RR_NT | 30 | 1.00 | 5.00 | 0.00 | 5 | 5.00 | 5.00 | 5.0 | 5 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_DRIVE | 30 | 1.00 | 3.68 | 2.79 | 0 | 2.00 | 3.00 | 5.0 | 12 | ▇▆▅▁▁ |
| BIS_Y_SS_BAS_DRIVE_NM | 30 | 1.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_DRIVE_NT | 30 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_FS | 30 | 1.00 | 4.55 | 2.59 | 0 | 3.00 | 4.00 | 6.0 | 12 | ▆▇▇▂▁ |
| BIS_Y_SS_BAS_FS_NM | 30 | 1.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BAS_FS_NT | 30 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BISM_SUM | 30 | 1.00 | 4.96 | 2.82 | 0 | 3.00 | 5.00 | 7.0 | 12 | ▅▆▇▃▂ |
| BIS_Y_SS_BISM_SUM_NM | 30 | 1.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BISM_SUM_NT | 30 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BASM_RR | 30 | 1.00 | 7.96 | 2.51 | 0 | 6.00 | 8.00 | 10.0 | 12 | ▁▂▇▇▇ |
| BIS_Y_SS_BASM_RR_NM | 30 | 1.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.0 | 0 | ▁▁▇▁▁ |
| BIS_Y_SS_BASM_RR_NT | 30 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| BIS_Y_SS_BASM_DRIVE | 30 | 1.00 | 3.68 | 2.79 | 0 | 2.00 | 3.00 | 5.0 | 12 | ▇▆▅▁▁ |
| BIS_Y_SS_BASM_DRIVE_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.0 | 4 | ▇▁▁▁▁ |
| BIS_Y_SS_BASM_DRIVE_NT | 0 | 1.00 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| SUP_Y_SS_SUM | 10414 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| SUP_Y_SS_SUM_NM | 10414 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| SUP_Y_SS_SUM_NT | 10414 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| GISH_Y_SS_M_SUM | 5066 | 0.51 | 19.63 | 1.06 | 4 | 20.00 | 20.00 | 20.0 | 20 | ▁▁▁▁▇ |
| GISH_Y_SS_M_SUM_NM | 2844 | 0.73 | 1.15 | 1.80 | 0 | 0.00 | 0.00 | 4.0 | 4 | ▇▁▁▁▃ |
| GISH_Y_SS_M_SUM_NT | 2844 | 0.73 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| GISH_Y_SS_F_SUM | 5551 | 0.47 | 18.67 | 2.31 | 4 | 18.00 | 20.00 | 20.0 | 20 | ▁▁▁▁▇ |
| GISH_Y_SS_F_SUM_NM | 3157 | 0.70 | 1.29 | 1.86 | 0 | 0.00 | 0.00 | 4.0 | 4 | ▇▁▁▁▃ |
| GISH_Y_SS_F_SUM_NT | 3157 | 0.70 | 4.00 | 0.00 | 4 | 4.00 | 4.00 | 4.0 | 4 | ▁▁▇▁▁ |
| PEQ_SS_RELATIONAL_AGGS_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.0 | 3 | ▇▁▁▁▁ |
| PEQ_SS_RELATIONAL_AGGS_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3.00 | 3.00 | 3.0 | 3 | ▁▁▇▁▁ |
| PEQ_SS_RELATIONAL_VICTIM | 22 | 1.00 | 4.74 | 1.93 | 3 | 3.00 | 4.00 | 6.0 | 15 | ▇▂▁▁▁ |
| PEQ_SS_RELATIONAL_VICTIM_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.0 | 3 | ▇▁▁▁▁ |
| PEQ_SS_RELATIONAL_VICTIM_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3.00 | 3.00 | 3.0 | 3 | ▁▁▇▁▁ |
| PEQ_SS_REPUTATION_AGGS | 22 | 1.00 | 3.17 | 0.67 | 3 | 3.00 | 3.00 | 3.0 | 15 | ▇▁▁▁▁ |
| PEQ_SS_REPUTATION_AGGS_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.0 | 3 | ▇▁▁▁▁ |
| PEQ_SS_REPUTATION_AGGS_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3.00 | 3.00 | 3.0 | 3 | ▁▁▇▁▁ |
| PEQ_SS_REPUTATION_VICTIM | 22 | 1.00 | 3.97 | 1.80 | 3 | 3.00 | 3.00 | 4.0 | 15 | ▇▁▁▁▁ |
| PEQ_SS_REPUTATION_VICTIM_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.0 | 3 | ▇▁▁▁▁ |
| PEQ_SS_REPUTATION_VICTIM_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3.00 | 3.00 | 3.0 | 3 | ▁▁▇▁▁ |
| PEQ_SS_OVERT_AGGRESSION | 22 | 1.00 | 3.31 | 0.93 | 3 | 3.00 | 3.00 | 3.0 | 15 | ▇▁▁▁▁ |
| PEQ_SS_OVERT_AGGRESSION_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.0 | 3 | ▇▁▁▁▁ |
| PEQ_SS_OVERT_AGGRESSION_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3.00 | 3.00 | 3.0 | 3 | ▁▁▇▁▁ |
| PEQ_SS_OVERT_VICTIM | 22 | 1.00 | 3.67 | 1.38 | 3 | 3.00 | 3.00 | 4.0 | 15 | ▇▁▁▁▁ |
| PEQ_SS_OVERT_VICTIM_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.0 | 3 | ▇▁▁▁▁ |
| PEQ_SS_OVERT_VICTIM_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3.00 | 3.00 | 3.0 | 3 | ▁▁▇▁▁ |
| PEQ_SS_RELATIONAL_AGGS | 22 | 1.00 | 3.87 | 1.27 | 3 | 3.00 | 3.00 | 5.0 | 14 | ▇▁▁▁▁ |
| PSTR_SS_PR | 10414 | 0.00 | NaN | NA | NA | NA | NA | NA | NA |
UPPS <- mentalHealthYouth %>% dplyr::select(SUBJECTKEY,EVENTNAME,
UPPS_Y_SS_NEGATIVE_URGENCY, UPPS_Y_SS_LACK_OF_PLANNING,
UPPS_Y_SS_SENSATION_SEEKING, UPPS_Y_SS_POSITIVE_URGENCY, UPPS_Y_SS_LACK_OF_PERSEVERANCE)
#ABCD Sum Scores Mental Health Youth
CBCL_sum_scores <-as_tibble(read.csv(paste0(dataFold,"ABCD_CBCLS01_DATA_TABLE.csv")))
CBCL_sum_scores %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 81 |
| _______________________ | |
| Column type frequency: | |
| character | 20 |
| logical | 1 |
| numeric | 60 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| CBCL_SCR_SYN_ANXDEP_M | 0 | 1 | 0 | 143 | 11865 | 4 | 0 |
| CBCL_SCR_SYN_WITHDEP_M | 0 | 1 | 0 | 90 | 11865 | 4 | 0 |
| CBCL_SCR_SYN_SOMATIC_M | 0 | 1 | 0 | 127 | 11865 | 3 | 0 |
| CBCL_SCR_SYN_SOCIAL_M | 0 | 1 | 0 | 120 | 11865 | 3 | 0 |
| CBCL_SCR_SYN_THOUGHT_M | 0 | 1 | 0 | 165 | 11865 | 4 | 0 |
| CBCL_SCR_SYN_ATTENTION_M | 0 | 1 | 0 | 109 | 11865 | 5 | 0 |
| CBCL_SCR_SYN_RULEBREAK_M | 0 | 1 | 0 | 189 | 11865 | 5 | 0 |
| CBCL_SCR_SYN_AGGRESSIVE_M | 0 | 1 | 0 | 198 | 11864 | 5 | 0 |
| CBCL_SCR_SYN_INTERNAL_M | 0 | 1 | 0 | 362 | 11865 | 5 | 0 |
| CBCL_SCR_SYN_EXTERNAL_M | 0 | 1 | 0 | 388 | 11864 | 6 | 0 |
| CBCL_SCR_SYN_TOTPROB_M | 0 | 1 | 0 | 1329 | 11864 | 7 | 0 |
| CBCL_SCR_DSM5_DEPRESS_M | 0 | 1 | 0 | 145 | 11865 | 4 | 0 |
| CBCL_SCR_DSM5_ANXDISORD_M | 0 | 1 | 0 | 99 | 11865 | 4 | 0 |
| CBCL_SCR_DSM5_SOMATICPR_M | 0 | 1 | 0 | 83 | 11865 | 3 | 0 |
| CBCL_SCR_DSM5_ADHD_M | 0 | 1 | 0 | 77 | 11865 | 5 | 0 |
| CBCL_SCR_DSM5_OPPOSIT_M | 0 | 1 | 0 | 54 | 11865 | 4 | 0 |
| CBCL_SCR_DSM5_CONDUCT_M | 0 | 1 | 0 | 188 | 11864 | 5 | 0 |
| CBCL_SCR_07_SCT_M | 0 | 1 | 0 | 44 | 11865 | 3 | 0 |
| CBCL_SCR_07_OCD_M | 0 | 1 | 0 | 88 | 11865 | 3 | 0 |
| CBCL_SCR_07_STRESS_M | 0 | 1 | 0 | 155 | 11865 | 4 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| CBCL_SCR_07_STRESS_NM_2 | 11876 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| CBCL_SCR_SYN_ANXDEP_R | 8 | 1 | 2.52 | 3.06 | 0 | 0 | 1 | 4 | 26 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ANXDEP_T | 8 | 1 | 53.48 | 5.96 | 50 | 50 | 50 | 54 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ANXDEP_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_WITHDEP_R | 8 | 1 | 1.03 | 1.71 | 0 | 0 | 0 | 1 | 15 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_WITHDEP_T | 8 | 1 | 53.51 | 5.79 | 50 | 50 | 50 | 54 | 97 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_WITHDEP_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_SOMATIC_R | 8 | 1 | 1.49 | 1.95 | 0 | 0 | 1 | 2 | 16 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOMATIC_T | 8 | 1 | 54.88 | 6.05 | 50 | 50 | 53 | 57 | 88 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_SOMATIC_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_SOCIAL_R | 8 | 1 | 1.62 | 2.28 | 0 | 0 | 1 | 2 | 18 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOCIAL_T | 8 | 1 | 52.80 | 4.74 | 50 | 50 | 51 | 53 | 90 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOCIAL_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_THOUGHT_R | 8 | 1 | 1.62 | 2.19 | 0 | 0 | 1 | 2 | 18 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_THOUGHT_T | 8 | 1 | 53.79 | 5.90 | 50 | 50 | 51 | 54 | 84 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_THOUGHT_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_ATTENTION_R | 8 | 1 | 2.98 | 3.49 | 0 | 0 | 2 | 5 | 20 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_ATTENTION_T | 8 | 1 | 53.90 | 6.17 | 50 | 50 | 51 | 55 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ATTENTION_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_RULEBREAK_R | 8 | 1 | 1.19 | 1.86 | 0 | 0 | 0 | 2 | 20 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_RULEBREAK_T | 8 | 1 | 52.78 | 4.90 | 50 | 50 | 50 | 53 | 84 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_RULEBREAK_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_R | 8 | 1 | 3.26 | 4.35 | 0 | 0 | 2 | 5 | 36 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_T | 8 | 1 | 52.83 | 5.53 | 50 | 50 | 50 | 53 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_NM | 8 | 1 | 0.00 | 0.01 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_INTERNAL_R | 8 | 1 | 5.05 | 5.53 | 0 | 1 | 3 | 7 | 51 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_INTERNAL_T | 8 | 1 | 48.45 | 10.64 | 33 | 41 | 48 | 56 | 93 | ▇▇▃▁▁ |
| CBCL_SCR_SYN_INTERNAL_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_EXTERNAL_R | 8 | 1 | 4.45 | 5.86 | 0 | 0 | 2 | 6 | 49 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_EXTERNAL_T | 8 | 1 | 45.72 | 10.33 | 33 | 34 | 44 | 53 | 84 | ▇▇▃▁▁ |
| CBCL_SCR_SYN_EXTERNAL_NM | 8 | 1 | 0.00 | 0.01 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_TOTPROB_R | 8 | 1 | 18.18 | 17.97 | 0 | 5 | 13 | 25 | 139 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_TOTPROB_T | 8 | 1 | 45.85 | 11.34 | 24 | 38 | 45 | 53 | 83 | ▃▇▆▂▁ |
| CBCL_SCR_SYN_TOTPROB_NM | 8 | 1 | 0.00 | 0.01 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_DEPRESS_R | 8 | 1 | 1.27 | 2.01 | 0 | 0 | 0 | 2 | 19 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_DEPRESS_T | 8 | 1 | 53.60 | 5.73 | 50 | 50 | 50 | 56 | 89 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_DEPRESS_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_ANXDISORD_R | 8 | 1 | 2.06 | 2.43 | 0 | 0 | 1 | 3 | 17 | ▇▂▁▁▁ |
| CBCL_SCR_DSM5_ANXDISORD_T | 8 | 1 | 53.49 | 6.13 | 50 | 50 | 51 | 54 | 97 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_ANXDISORD_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_SOMATICPR_R | 8 | 1 | 1.08 | 1.51 | 0 | 0 | 0 | 2 | 11 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_SOMATICPR_T | 8 | 1 | 55.46 | 6.62 | 50 | 50 | 50 | 61 | 90 | ▇▂▁▁▁ |
| CBCL_SCR_DSM5_SOMATICPR_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_ADHD_R | 8 | 1 | 2.62 | 2.97 | 0 | 0 | 2 | 4 | 14 | ▇▃▂▁▁ |
| CBCL_SCR_DSM5_ADHD_T | 8 | 1 | 53.23 | 5.63 | 50 | 50 | 50 | 55 | 80 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_ADHD_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_OPPOSIT_R | 8 | 1 | 1.77 | 2.04 | 0 | 0 | 1 | 3 | 10 | ▇▂▁▁▁ |
| CBCL_SCR_DSM5_OPPOSIT_T | 8 | 1 | 53.47 | 5.41 | 50 | 50 | 51 | 55 | 80 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_OPPOSIT_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_CONDUCT_R | 8 | 1 | 1.28 | 2.36 | 0 | 0 | 0 | 2 | 25 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_CONDUCT_T | 8 | 1 | 53.03 | 5.53 | 50 | 50 | 50 | 54 | 89 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_CONDUCT_NM | 8 | 1 | 0.00 | 0.01 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▁ |
| CBCL_SCR_07_SCT_R | 8 | 1 | 0.52 | 1.00 | 0 | 0 | 0 | 1 | 8 | ▇▁▁▁▁ |
| CBCL_SCR_07_SCT_T | 8 | 1 | 53.02 | 5.41 | 50 | 50 | 50 | 56 | 80 | ▇▁▁▁▁ |
| CBCL_SCR_07_SCT_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_07_OCD_R | 8 | 1 | 1.34 | 1.82 | 0 | 0 | 1 | 2 | 14 | ▇▂▁▁▁ |
| CBCL_SCR_07_OCD_T | 8 | 1 | 53.69 | 6.12 | 50 | 50 | 51 | 55 | 94 | ▇▁▁▁▁ |
| CBCL_SCR_07_OCD_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_07_STRESS_R | 8 | 1 | 2.90 | 3.35 | 0 | 0 | 2 | 4 | 24 | ▇▂▁▁▁ |
| CBCL_SCR_07_STRESS_T | 8 | 1 | 53.32 | 6.00 | 50 | 50 | 50 | 54 | 93 | ▇▁▁▁▁ |
| CBCL_SCR_07_STRESS_NM | 8 | 1 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
CBCL_sum_scores %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 81 |
| _______________________ | |
| Column type frequency: | |
| character | 20 |
| logical | 1 |
| numeric | 60 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| CBCL_SCR_SYN_ANXDEP_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_WITHDEP_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_SOMATIC_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_SOCIAL_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_THOUGHT_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_ATTENTION_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_RULEBREAK_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_AGGRESSIVE_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_INTERNAL_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_EXTERNAL_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_SYN_TOTPROB_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_DSM5_DEPRESS_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_DSM5_ANXDISORD_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_DSM5_SOMATICPR_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_DSM5_ADHD_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_DSM5_OPPOSIT_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_DSM5_CONDUCT_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_07_SCT_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_07_OCD_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
| CBCL_SCR_07_STRESS_M | 0 | 1 | 0 | 0 | 10414 | 1 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| CBCL_SCR_07_STRESS_NM_2 | 10414 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| CBCL_SCR_SYN_ANXDEP_R | 2329 | 0.78 | 2.32 | 2.96 | 0 | 0 | 1 | 3 | 22 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ANXDEP_T | 2329 | 0.78 | 53.20 | 5.77 | 50 | 50 | 50 | 54 | 92 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ANXDEP_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_WITHDEP_R | 2329 | 0.78 | 1.22 | 1.91 | 0 | 0 | 0 | 2 | 16 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_WITHDEP_T | 2329 | 0.78 | 53.47 | 5.73 | 50 | 50 | 50 | 54 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_WITHDEP_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_SOMATIC_R | 2329 | 0.78 | 1.40 | 1.92 | 0 | 0 | 1 | 2 | 16 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOMATIC_T | 2329 | 0.78 | 54.59 | 5.90 | 50 | 50 | 53 | 57 | 88 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_SOMATIC_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_SOCIAL_R | 2329 | 0.78 | 1.31 | 2.07 | 0 | 0 | 0 | 2 | 17 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOCIAL_T | 2329 | 0.78 | 52.56 | 4.73 | 50 | 50 | 50 | 53 | 89 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOCIAL_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_THOUGHT_R | 2329 | 0.78 | 1.44 | 2.07 | 0 | 0 | 1 | 2 | 22 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_THOUGHT_T | 2329 | 0.78 | 53.61 | 5.72 | 50 | 50 | 51 | 55 | 89 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_THOUGHT_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_ATTENTION_R | 2329 | 0.78 | 2.70 | 3.31 | 0 | 0 | 1 | 4 | 19 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_ATTENTION_T | 2329 | 0.78 | 53.47 | 5.58 | 50 | 50 | 51 | 55 | 97 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ATTENTION_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_RULEBREAK_R | 2329 | 0.78 | 1.06 | 1.84 | 0 | 0 | 0 | 1 | 23 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_RULEBREAK_T | 2329 | 0.78 | 52.11 | 4.17 | 50 | 50 | 50 | 52 | 84 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_RULEBREAK_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_R | 2329 | 0.78 | 2.87 | 4.02 | 0 | 0 | 1 | 4 | 33 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_T | 2329 | 0.78 | 52.37 | 4.96 | 50 | 50 | 50 | 52 | 95 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_INTERNAL_R | 2329 | 0.78 | 4.94 | 5.62 | 0 | 1 | 3 | 7 | 50 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_INTERNAL_T | 2329 | 0.78 | 47.77 | 10.50 | 33 | 40 | 47 | 54 | 90 | ▇▇▃▁▁ |
| CBCL_SCR_SYN_INTERNAL_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_EXTERNAL_R | 2329 | 0.78 | 3.93 | 5.52 | 0 | 0 | 2 | 5 | 50 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_EXTERNAL_T | 2329 | 0.78 | 44.48 | 9.80 | 33 | 34 | 44 | 51 | 83 | ▇▅▂▁▁ |
| CBCL_SCR_SYN_EXTERNAL_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_SYN_TOTPROB_R | 2329 | 0.78 | 16.43 | 17.06 | 0 | 4 | 11 | 22 | 161 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_TOTPROB_T | 2329 | 0.78 | 44.77 | 11.27 | 24 | 37 | 45 | 52 | 88 | ▅▇▅▁▁ |
| CBCL_SCR_SYN_TOTPROB_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_DEPRESS_R | 2329 | 0.78 | 1.47 | 2.24 | 0 | 0 | 1 | 2 | 19 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_DEPRESS_T | 2329 | 0.78 | 53.75 | 5.88 | 50 | 50 | 51 | 56 | 89 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_DEPRESS_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_ANXDISORD_R | 2329 | 0.78 | 1.81 | 2.31 | 0 | 0 | 1 | 3 | 18 | ▇▂▁▁▁ |
| CBCL_SCR_DSM5_ANXDISORD_T | 2329 | 0.78 | 53.35 | 5.89 | 50 | 50 | 51 | 53 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_ANXDISORD_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_SOMATICPR_R | 2329 | 0.78 | 1.05 | 1.48 | 0 | 0 | 0 | 2 | 11 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_SOMATICPR_T | 2329 | 0.78 | 54.99 | 6.36 | 50 | 50 | 50 | 59 | 90 | ▇▂▁▁▁ |
| CBCL_SCR_DSM5_SOMATICPR_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_ADHD_R | 2329 | 0.78 | 2.29 | 2.78 | 0 | 0 | 1 | 4 | 14 | ▇▃▁▁▁ |
| CBCL_SCR_DSM5_ADHD_T | 2329 | 0.78 | 53.07 | 5.33 | 50 | 50 | 50 | 55 | 80 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_ADHD_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_OPPOSIT_R | 2329 | 0.78 | 1.60 | 1.95 | 0 | 0 | 1 | 3 | 10 | ▇▂▁▁▁ |
| CBCL_SCR_DSM5_OPPOSIT_T | 2329 | 0.78 | 53.07 | 5.03 | 50 | 50 | 51 | 55 | 80 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_OPPOSIT_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_DSM5_CONDUCT_R | 2329 | 0.78 | 1.13 | 2.23 | 0 | 0 | 0 | 1 | 25 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_CONDUCT_T | 2329 | 0.78 | 52.40 | 4.81 | 50 | 50 | 50 | 52 | 87 | ▇▁▁▁▁ |
| CBCL_SCR_DSM5_CONDUCT_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_07_SCT_R | 2329 | 0.78 | 0.50 | 0.97 | 0 | 0 | 0 | 1 | 8 | ▇▁▁▁▁ |
| CBCL_SCR_07_SCT_T | 2329 | 0.78 | 52.70 | 5.02 | 50 | 50 | 50 | 55 | 80 | ▇▁▁▁▁ |
| CBCL_SCR_07_SCT_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_07_OCD_R | 2329 | 0.78 | 1.24 | 1.75 | 0 | 0 | 1 | 2 | 16 | ▇▁▁▁▁ |
| CBCL_SCR_07_OCD_T | 2329 | 0.78 | 53.30 | 5.72 | 50 | 50 | 51 | 55 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_07_OCD_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
| CBCL_SCR_07_STRESS_R | 2329 | 0.78 | 2.74 | 3.25 | 0 | 0 | 2 | 4 | 28 | ▇▁▁▁▁ |
| CBCL_SCR_07_STRESS_T | 2329 | 0.78 | 52.92 | 5.53 | 50 | 50 | 50 | 54 | 100 | ▇▁▁▁▁ |
| CBCL_SCR_07_STRESS_NM | 2329 | 0.78 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | ▁▁▇▁▁ |
CBCL_sum_score_select <- CBCL_sum_scores %>% dplyr::select(all_of(c("SUBJECTKEY","EVENTNAME",
"CBCL_SCR_SYN_ANXDEP_R", "CBCL_SCR_SYN_WITHDEP_R", "CBCL_SCR_SYN_SOMATIC_R", "CBCL_SCR_SYN_SOCIAL_R",
"CBCL_SCR_SYN_THOUGHT_R","CBCL_SCR_SYN_ATTENTION_R", "CBCL_SCR_SYN_RULEBREAK_R", "CBCL_SCR_SYN_AGGRESSIVE_R")))
all_sum_vars <-
plyr::join_all(list(Siteinfo, ASRPrecomputedSelected,BISBAS,UPPS,CBCL_sum_score_select,vision_idx,ACSselected),
by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
filter(visionProb != 1|is.na(visionProb)) %>% #remove subjects with eyesight problems
dplyr::select(-visionProb)%>%
filter(EVENTNAME %in% c("baseline_year_1_arm_1","2_year_follow_up_y_arm_1"))## select the event for analysis
## there are all NAs for event name of 42_month_follow_up_arm_1 and 30_month_follow_up_arm_1
all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% dplyr::select(-1:-2) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11845 |
| Number of columns | 55 |
| _______________________ | |
| Column type frequency: | |
| character | 6 |
| factor | 2 |
| numeric | 47 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11845 | 0 |
| SRC_SUBJECT_ID | 0 | 1 | 16 | 16 | 0 | 11845 | 0 |
| INTERVIEW_DATE | 0 | 1 | 9 | 9 | 0 | 756 | 0 |
| SEX | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
| SITE_ID_L | 0 | 1 | 6 | 6 | 0 | 22 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| RACE_ETHNICITY | 2 | 1 | FALSE | 5 | Whi: 6171, His: 2403, Bla: 1775, Oth: 1243 |
| REL_FAMILY_ID | 0 | 1 | FALSE | 9832 | 373: 5, 749: 4, 11: 3, 400: 3 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| INTERVIEW_AGE | 0 | 1.00 | 118.98 | 7.49 | 107.00 | 112.00 | 119.00 | 126.00 | 133.00 | ▇▆▆▆▆ |
| SCHED_DELAY | 13 | 1.00 | 7.00 | 0.10 | 1.00 | 7.00 | 7.00 | 7.00 | 7.00 | ▁▁▁▁▇ |
| SCHED_HYBRID | 11845 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| ASR_SCR_PERSTR_R | 5 | 1.00 | 16.52 | 3.52 | 0.00 | 15.00 | 17.00 | 19.00 | 22.00 | ▁▁▂▇▇ |
| ASR_SCR_ANXDEP_R | 5 | 1.00 | 5.04 | 4.93 | 0.00 | 1.00 | 4.00 | 7.00 | 35.00 | ▇▂▁▁▁ |
| ASR_SCR_WITHDRAWN_R | 5 | 1.00 | 1.56 | 2.11 | 0.00 | 0.00 | 1.00 | 2.00 | 17.00 | ▇▁▁▁▁ |
| ASR_SCR_SOMATIC_R | 5 | 1.00 | 2.94 | 3.18 | 0.00 | 1.00 | 2.00 | 4.00 | 23.00 | ▇▂▁▁▁ |
| ASR_SCR_THOUGHT_R | 5 | 1.00 | 1.43 | 1.85 | 0.00 | 0.00 | 1.00 | 2.00 | 18.00 | ▇▁▁▁▁ |
| ASR_SCR_ATTENTION_R | 5 | 1.00 | 4.63 | 4.26 | 0.00 | 1.00 | 4.00 | 7.00 | 27.00 | ▇▃▁▁▁ |
| ASR_SCR_AGGRESSIVE_R | 5 | 1.00 | 3.35 | 3.57 | 0.00 | 1.00 | 2.00 | 5.00 | 25.00 | ▇▂▁▁▁ |
| ASR_SCR_RULEBREAK_R | 5 | 1.00 | 1.19 | 1.85 | 0.00 | 0.00 | 0.00 | 2.00 | 18.00 | ▇▁▁▁▁ |
| ASR_SCR_INTRUSIVE_R | 5 | 1.00 | 1.00 | 1.43 | 0.00 | 0.00 | 0.00 | 2.00 | 10.00 | ▇▁▁▁▁ |
| ASR_SCR_INTERNAL_R | 5 | 1.00 | 9.53 | 8.74 | 0.00 | 3.00 | 7.00 | 13.00 | 70.00 | ▇▂▁▁▁ |
| ASR_SCR_EXTERNAL_R | 5 | 1.00 | 5.55 | 5.62 | 0.00 | 1.00 | 4.00 | 8.00 | 39.00 | ▇▂▁▁▁ |
| ASR_SCR_TOTPROB_R | 4 | 1.00 | 21.15 | 17.96 | 0.00 | 8.00 | 16.00 | 29.00 | 154.00 | ▇▂▁▁▁ |
| ASR_SCR_DEPRESS_R | 5 | 1.00 | 3.97 | 3.66 | 0.00 | 1.00 | 3.00 | 6.00 | 28.00 | ▇▂▁▁▁ |
| ASR_SCR_ANXDISORD_R | 5 | 1.00 | 3.76 | 2.56 | 0.00 | 2.00 | 3.00 | 5.00 | 12.00 | ▇▆▆▂▁ |
| ASR_SCR_SOMATICPR_R | 5 | 1.00 | 1.91 | 2.37 | 0.00 | 0.00 | 1.00 | 3.00 | 18.00 | ▇▂▁▁▁ |
| ASR_SCR_AVOIDANT_R | 5 | 1.00 | 2.01 | 2.15 | 0.00 | 0.00 | 1.00 | 3.00 | 14.00 | ▇▃▁▁▁ |
| ASR_SCR_ADHD_R | 5 | 1.00 | 3.80 | 3.64 | 0.00 | 1.00 | 3.00 | 6.00 | 25.00 | ▇▂▁▁▁ |
| ASR_SCR_ANTISOCIAL_R | 5 | 1.00 | 2.11 | 2.49 | 0.00 | 0.00 | 1.00 | 3.00 | 22.00 | ▇▁▁▁▁ |
| ASR_SCR_INATTENTION_R | 5 | 1.00 | 2.35 | 2.35 | 0.00 | 0.00 | 2.00 | 4.00 | 14.00 | ▇▃▁▁▁ |
| ASR_SCR_HYPERACTIVE_R | 5 | 1.00 | 1.46 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 12.00 | ▇▂▁▁▁ |
| BISAvg | 22 | 1.00 | 1.38 | 0.71 | 0.00 | 0.75 | 1.25 | 2.00 | 3.00 | ▃▅▇▃▂ |
| BASRRAvg | 23 | 1.00 | 2.20 | 0.62 | 0.00 | 1.75 | 2.25 | 2.75 | 3.00 | ▁▁▅▆▇ |
| BASDriveAvg | 23 | 1.00 | 1.03 | 0.77 | 0.00 | 0.50 | 1.00 | 1.50 | 3.00 | ▇▆▅▂▂ |
| BASFunAvg | 23 | 1.00 | 1.43 | 0.66 | 0.00 | 1.00 | 1.50 | 1.75 | 3.00 | ▂▅▇▃▂ |
| BASAllAvg | 23 | 1.00 | 1.55 | 0.54 | 0.00 | 1.17 | 1.50 | 1.92 | 3.00 | ▁▅▇▅▂ |
| UPPS_Y_SS_NEGATIVE_URGENCY | 23 | 1.00 | 8.49 | 2.65 | 4.00 | 7.00 | 8.00 | 10.00 | 16.00 | ▆▇▇▂▁ |
| UPPS_Y_SS_LACK_OF_PLANNING | 23 | 1.00 | 7.74 | 2.38 | 4.00 | 6.00 | 8.00 | 9.00 | 16.00 | ▆▇▅▁▁ |
| UPPS_Y_SS_SENSATION_SEEKING | 23 | 1.00 | 9.77 | 2.68 | 4.00 | 8.00 | 10.00 | 12.00 | 16.00 | ▂▅▇▃▂ |
| UPPS_Y_SS_POSITIVE_URGENCY | 23 | 1.00 | 7.99 | 2.96 | 4.00 | 6.00 | 8.00 | 10.00 | 16.00 | ▇▆▆▂▁ |
| UPPS_Y_SS_LACK_OF_PERSEVERANCE | 23 | 1.00 | 7.04 | 2.25 | 4.00 | 5.00 | 7.00 | 8.00 | 16.00 | ▇▆▃▁▁ |
| CBCL_SCR_SYN_ANXDEP_R | 8 | 1.00 | 2.52 | 3.07 | 0.00 | 0.00 | 1.00 | 4.00 | 26.00 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_WITHDEP_R | 8 | 1.00 | 1.04 | 1.71 | 0.00 | 0.00 | 0.00 | 1.00 | 15.00 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOMATIC_R | 8 | 1.00 | 1.50 | 1.95 | 0.00 | 0.00 | 1.00 | 2.00 | 16.00 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOCIAL_R | 8 | 1.00 | 1.63 | 2.28 | 0.00 | 0.00 | 1.00 | 2.00 | 18.00 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_THOUGHT_R | 8 | 1.00 | 1.62 | 2.20 | 0.00 | 0.00 | 1.00 | 2.00 | 18.00 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ATTENTION_R | 8 | 1.00 | 2.98 | 3.49 | 0.00 | 0.00 | 2.00 | 5.00 | 20.00 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_RULEBREAK_R | 8 | 1.00 | 1.19 | 1.86 | 0.00 | 0.00 | 0.00 | 2.00 | 20.00 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_R | 8 | 1.00 | 3.26 | 4.36 | 0.00 | 0.00 | 2.00 | 5.00 | 36.00 | ▇▁▁▁▁ |
| ABCD_SVS01_ID | 0 | 1.00 | 50523.16 | 6430.86 | 39373.00 | 44951.00 | 50525.00 | 56092.00 | 61660.00 | ▇▇▇▇▇ |
| SNELLEN_AID_Y | 21 | 1.00 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| SNELLEN_AIDPRES_Y | 8987 | 0.24 | 0.62 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
| SNELLEN_VA_Y | 28 | 1.00 | 6.84 | 1.44 | 2.00 | 6.00 | 7.00 | 8.00 | 11.00 | ▁▃▇▆▁ |
| VIS_FLG | 11083 | 0.06 | 1.00 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▇▁▁ |
| ACS_RAKED_PROPENSITY_SCORE | 0 | 1.00 | 691.25 | 350.96 | 161.36 | 448.94 | 619.31 | 821.72 | 1778.92 | ▅▇▂▂▁ |
all_sum_vars %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% dplyr::select(-1:-2) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10387 |
| Number of columns | 55 |
| _______________________ | |
| Column type frequency: | |
| character | 6 |
| factor | 2 |
| numeric | 47 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 10387 | 0 |
| SRC_SUBJECT_ID | 0 | 1 | 16 | 16 | 0 | 10387 | 0 |
| INTERVIEW_DATE | 0 | 1 | 9 | 9 | 0 | 788 | 0 |
| SEX | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| EVENTNAME | 0 | 1 | 24 | 24 | 0 | 1 | 0 |
| SITE_ID_L | 0 | 1 | 6 | 6 | 0 | 21 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| RACE_ETHNICITY | 10387 | 0 | FALSE | 0 | Whi: 0, Bla: 0, His: 0, Asi: 0 |
| REL_FAMILY_ID | 10387 | 0 | FALSE | 0 | 0: 0, 1: 0, 3: 0, 4: 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| INTERVIEW_AGE | 0 | 1.00 | 144.04 | 7.95 | 127 | 137.00 | 144.00 | 151.00 | 168 | ▅▇▇▅▁ |
| SCHED_DELAY | 0 | 1.00 | 7.55 | 0.89 | 7 | 7.00 | 7.00 | 9.00 | 9 | ▇▁▁▁▃ |
| SCHED_HYBRID | 7674 | 0.26 | 0.49 | 0.50 | 0 | 0.00 | 0.00 | 1.00 | 1 | ▇▁▁▁▇ |
| ASR_SCR_PERSTR_R | 57 | 0.99 | 16.10 | 3.70 | 0 | 14.00 | 17.00 | 19.00 | 22 | ▁▁▃▇▇ |
| ASR_SCR_ANXDEP_R | 57 | 0.99 | 5.03 | 4.96 | 0 | 1.00 | 4.00 | 7.00 | 34 | ▇▂▁▁▁ |
| ASR_SCR_WITHDRAWN_R | 57 | 0.99 | 1.56 | 2.06 | 0 | 0.00 | 1.00 | 2.00 | 18 | ▇▁▁▁▁ |
| ASR_SCR_SOMATIC_R | 57 | 0.99 | 2.95 | 3.17 | 0 | 1.00 | 2.00 | 4.00 | 22 | ▇▂▁▁▁ |
| ASR_SCR_THOUGHT_R | 57 | 0.99 | 1.28 | 1.71 | 0 | 0.00 | 1.00 | 2.00 | 17 | ▇▁▁▁▁ |
| ASR_SCR_ATTENTION_R | 57 | 0.99 | 4.59 | 4.22 | 0 | 1.00 | 3.00 | 7.00 | 29 | ▇▃▁▁▁ |
| ASR_SCR_AGGRESSIVE_R | 57 | 0.99 | 3.13 | 3.37 | 0 | 1.00 | 2.00 | 5.00 | 26 | ▇▂▁▁▁ |
| ASR_SCR_RULEBREAK_R | 57 | 0.99 | 1.06 | 1.73 | 0 | 0.00 | 0.00 | 2.00 | 21 | ▇▁▁▁▁ |
| ASR_SCR_INTRUSIVE_R | 57 | 0.99 | 0.95 | 1.39 | 0 | 0.00 | 0.00 | 1.00 | 11 | ▇▁▁▁▁ |
| ASR_SCR_INTERNAL_R | 57 | 0.99 | 9.55 | 8.72 | 0 | 3.00 | 7.00 | 13.00 | 65 | ▇▂▁▁▁ |
| ASR_SCR_EXTERNAL_R | 57 | 0.99 | 5.14 | 5.30 | 0 | 1.00 | 4.00 | 7.00 | 57 | ▇▁▁▁▁ |
| ASR_SCR_TOTPROB_R | 57 | 0.99 | 20.55 | 17.46 | 0 | 8.00 | 16.00 | 28.00 | 133 | ▇▂▁▁▁ |
| ASR_SCR_DEPRESS_R | 57 | 0.99 | 4.00 | 3.67 | 0 | 1.00 | 3.00 | 6.00 | 25 | ▇▂▁▁▁ |
| ASR_SCR_ANXDISORD_R | 57 | 0.99 | 3.64 | 2.53 | 0 | 2.00 | 3.00 | 5.00 | 12 | ▇▆▆▁▁ |
| ASR_SCR_SOMATICPR_R | 57 | 0.99 | 1.92 | 2.35 | 0 | 0.00 | 1.00 | 3.00 | 17 | ▇▂▁▁▁ |
| ASR_SCR_AVOIDANT_R | 57 | 0.99 | 1.98 | 2.11 | 0 | 0.00 | 1.00 | 3.00 | 13 | ▇▃▁▁▁ |
| ASR_SCR_ADHD_R | 57 | 0.99 | 3.68 | 3.54 | 0 | 1.00 | 3.00 | 5.00 | 23 | ▇▃▁▁▁ |
| ASR_SCR_ANTISOCIAL_R | 57 | 0.99 | 2.00 | 2.35 | 0 | 0.00 | 1.00 | 3.00 | 30 | ▇▁▁▁▁ |
| ASR_SCR_INATTENTION_R | 57 | 0.99 | 2.35 | 2.33 | 0 | 0.00 | 2.00 | 4.00 | 14 | ▇▃▁▁▁ |
| ASR_SCR_HYPERACTIVE_R | 57 | 0.99 | 1.33 | 1.66 | 0 | 0.00 | 1.00 | 2.00 | 12 | ▇▁▁▁▁ |
| BISAvg | 30 | 1.00 | 1.24 | 0.71 | 0 | 0.75 | 1.25 | 1.75 | 3 | ▅▆▇▃▂ |
| BASRRAvg | 30 | 1.00 | 1.96 | 0.65 | 0 | 1.50 | 2.00 | 2.50 | 3 | ▁▂▇▇▇ |
| BASDriveAvg | 30 | 1.00 | 0.92 | 0.70 | 0 | 0.50 | 0.75 | 1.25 | 3 | ▇▆▅▁▁ |
| BASFunAvg | 30 | 1.00 | 1.14 | 0.65 | 0 | 0.75 | 1.00 | 1.50 | 3 | ▆▇▇▂▁ |
| BASAllAvg | 30 | 1.00 | 1.34 | 0.53 | 0 | 1.00 | 1.33 | 1.67 | 3 | ▂▇▇▃▁ |
| UPPS_Y_SS_NEGATIVE_URGENCY | 30 | 1.00 | 7.76 | 2.34 | 4 | 6.00 | 8.00 | 9.00 | 16 | ▇▇▇▁▁ |
| UPPS_Y_SS_LACK_OF_PLANNING | 30 | 1.00 | 7.77 | 2.24 | 4 | 6.00 | 8.00 | 9.00 | 16 | ▅▇▃▁▁ |
| UPPS_Y_SS_SENSATION_SEEKING | 29 | 1.00 | 9.49 | 2.68 | 3 | 8.00 | 9.00 | 11.00 | 16 | ▂▇▇▇▂ |
| UPPS_Y_SS_POSITIVE_URGENCY | 30 | 1.00 | 7.39 | 2.67 | 4 | 5.00 | 8.00 | 9.00 | 16 | ▇▆▅▁▁ |
| UPPS_Y_SS_LACK_OF_PERSEVERANCE | 30 | 1.00 | 6.98 | 2.25 | 4 | 5.00 | 7.00 | 8.00 | 16 | ▇▇▃▁▁ |
| CBCL_SCR_SYN_ANXDEP_R | 2327 | 0.78 | 2.32 | 2.96 | 0 | 0.00 | 1.00 | 3.00 | 22 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_WITHDEP_R | 2327 | 0.78 | 1.21 | 1.91 | 0 | 0.00 | 0.00 | 2.00 | 16 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOMATIC_R | 2327 | 0.78 | 1.40 | 1.91 | 0 | 0.00 | 1.00 | 2.00 | 16 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_SOCIAL_R | 2327 | 0.78 | 1.31 | 2.07 | 0 | 0.00 | 0.00 | 2.00 | 17 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_THOUGHT_R | 2327 | 0.78 | 1.44 | 2.07 | 0 | 0.00 | 1.00 | 2.00 | 22 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_ATTENTION_R | 2327 | 0.78 | 2.70 | 3.31 | 0 | 0.00 | 1.00 | 4.00 | 19 | ▇▂▁▁▁ |
| CBCL_SCR_SYN_RULEBREAK_R | 2327 | 0.78 | 1.06 | 1.84 | 0 | 0.00 | 0.00 | 1.00 | 23 | ▇▁▁▁▁ |
| CBCL_SCR_SYN_AGGRESSIVE_R | 2327 | 0.78 | 2.87 | 4.02 | 0 | 0.00 | 1.00 | 4.00 | 33 | ▇▁▁▁▁ |
| ABCD_SVS01_ID | 0 | 1.00 | 50511.20 | 6441.84 | 39372 | 44933.00 | 50506.00 | 56094.00 | 61661 | ▇▇▇▇▇ |
| SNELLEN_AID_Y | 1626 | 0.84 | 0.28 | 0.45 | 0 | 0.00 | 0.00 | 1.00 | 1 | ▇▁▁▁▃ |
| SNELLEN_AIDPRES_Y | 7945 | 0.24 | 0.67 | 0.47 | 0 | 0.00 | 1.00 | 1.00 | 1 | ▃▁▁▁▇ |
| SNELLEN_VA_Y | 1625 | 0.84 | 7.11 | 1.53 | 2 | 6.00 | 7.00 | 8.00 | 11 | ▁▃▇▇▁ |
| VIS_FLG | 9840 | 0.05 | 1.00 | 0.00 | 1 | 1.00 | 1.00 | 1.00 | 1 | ▁▁▇▁▁ |
| ACS_RAKED_PROPENSITY_SCORE | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA |
make sure that there are no members from the same family at different sites
all_sum_vars_baseline <- all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1")
all_sum_vars_baseline %>% count(SITE_ID_L)
## SITE_ID_L n
## 1 site01 405
## 2 site02 558
## 3 site03 629
## 4 site04 745
## 5 site05 377
## 6 site06 580
## 7 site07 339
## 8 site08 350
## 9 site09 433
## 10 site10 736
## 11 site11 448
## 12 site12 600
## 13 site13 726
## 14 site14 606
## 15 site15 457
## 16 site16 1010
## 17 site17 577
## 18 site18 384
## 19 site19 549
## 20 site20 701
## 21 site21 599
## 22 site22 36
# check if there are members from the same family at different sites. There are 6 of them.
all_sum_vars_baseline %>%
drop_na(SITE_ID_L) %>%
filter(SITE_ID_L != "site22") %>%
count(REL_FAMILY_ID, SITE_ID_L) %>%
spread(SITE_ID_L, n, fill = 0) %>%
dplyr::select(-REL_FAMILY_ID) %>%
as.matrix %>%
crossprod
## site01 site02 site03 site04 site05 site06 site07 site08 site09 site10
## site01 495 0 0 0 0 0 0 0 0 0
## site02 0 1048 0 0 0 0 0 0 0 0
## site03 0 0 751 0 0 0 0 0 0 0
## site04 0 0 0 955 0 0 0 0 0 0
## site05 0 0 0 0 473 0 0 0 0 0
## site06 0 0 0 0 0 688 0 0 0 0
## site07 0 0 0 0 0 0 425 0 0 0
## site08 0 0 0 0 0 0 0 434 0 0
## site09 0 0 0 0 0 0 0 0 479 0
## site10 0 0 0 0 0 0 0 0 0 910
## site11 0 0 0 0 0 0 0 0 0 0
## site12 0 0 0 0 0 0 0 0 0 0
## site13 0 0 0 0 0 0 0 0 0 0
## site14 0 0 0 0 0 0 0 0 0 0
## site15 0 0 0 0 0 0 0 0 0 0
## site16 0 0 0 0 0 0 0 0 0 0
## site17 0 0 0 0 0 0 0 0 0 0
## site18 0 0 0 0 0 0 0 0 0 0
## site19 0 0 0 0 0 0 0 0 0 0
## site20 0 0 0 0 0 0 0 0 0 0
## site21 0 0 0 0 0 0 0 0 0 0
## site11 site12 site13 site14 site15 site16 site17 site18 site19 site20
## site01 0 0 0 0 0 0 0 0 0 0
## site02 0 0 0 0 0 0 0 0 0 0
## site03 0 0 0 0 0 0 0 0 0 0
## site04 0 0 0 0 0 0 0 0 0 0
## site05 0 0 0 0 0 0 0 0 0 0
## site06 0 0 0 0 0 0 0 0 0 0
## site07 0 0 0 0 0 0 0 0 0 0
## site08 0 0 0 0 0 0 0 0 0 0
## site09 0 0 0 0 0 0 0 0 0 0
## site10 0 0 0 0 0 0 0 0 0 0
## site11 562 0 0 0 0 0 0 0 0 0
## site12 0 746 0 0 0 0 0 0 0 0
## site13 0 0 888 0 0 0 0 0 0 0
## site14 0 0 0 1106 0 0 0 0 0 0
## site15 0 0 0 0 549 0 0 0 0 0
## site16 0 0 0 0 0 1394 0 0 0 0
## site17 0 0 0 0 0 0 697 0 0 0
## site18 0 0 0 0 0 0 0 448 0 0
## site19 0 0 0 0 0 0 0 0 1015 0
## site20 0 0 0 0 0 0 0 0 0 1187
## site21 0 0 0 0 0 0 0 0 0 0
## site21
## site01 0
## site02 0
## site03 0
## site04 0
## site05 0
## site06 0
## site07 0
## site08 0
## site09 0
## site10 0
## site11 0
## site12 0
## site13 0
## site14 0
## site15 0
## site16 0
## site17 0
## site18 0
## site19 0
## site20 0
## site21 723
#below will remove those 6
all_sum_vars_baseline_no_dup <- all_sum_vars_baseline %>%
drop_na(SITE_ID_L) %>%
filter(SITE_ID_L != "site22") %>%
group_by(REL_FAMILY_ID) %>%
nest(SITE_ID_L, .key="SITE_ID_L") %>%
mutate(dup = ifelse(length(c(unlist(SITE_ID_L)))==1,0,
ifelse(length(unique(c(unlist(SITE_ID_L)))) > 1,1,0))) %>%
unnest(SITE_ID_L) %>%
ungroup()
## Warning: Supplying `...` without names was deprecated in tidyr 1.0.0.
## ℹ Please specify a name for each selection.
## ℹ Did you want `SITE_ID_L = SITE_ID_L`?
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
family_exclude <- unique(all_sum_vars_baseline_no_dup$REL_FAMILY_ID[which(all_sum_vars_baseline_no_dup$dup==1)])
all_sum_vars_no_dup <- all_sum_vars%>%
filter(!REL_FAMILY_ID %in% family_exclude)%>%
drop_na(SITE_ID_L) %>%
filter(SITE_ID_L != "site22")
Samples: REL_FAMILY_ID (9856 Levels) SITE_ID_L (need to remove 22nd site. having too few subjects) ALSO make sure about EVENTNAME
Target: Factor analysis of cognitive abilities: gfactor
72 Features:
SEX Ethnicity - Black, Hispanic, Asian, Other
Features by categories:
Parent Mental Health(9): ASR_SCR_PERSTR_R ASR_SCR_ANXDEP_R ASR_SCR_WITHDRAWN_R ASR_SCR_SOMATIC_R ASR_SCR_THOUGHT_R ASR_SCR_ATTENTION_R ASR_SCR_AGGRESSIVE_R ASR_SCR_RULEBREAK_R ASR_SCR_INTRUSIVE_R
Child Personality(9): BISAvg BASRRAvg BASDriveAvg BASFunAvg UPPS_Y_SS_NEGATIVE_URGENCY UPPS_Y_SS_LACK_OF_PLANNING UPPS_Y_SS_SENSATION_SEEKING UPPS_Y_SS_POSITIVE_URGENCY UPPS_Y_SS_LACK_OF_PERSEVERANCE
CBCL sum scores
“CBCL_SCR_SYN_ANXDEP_R” “CBCL_SCR_SYN_WITHDEP_R” “CBCL_SCR_SYN_SOMATIC_R” “CBCL_SCR_SYN_SOCIAL_R” “CBCL_SCR_SYN_THOUGHT_R” “CBCL_SCR_SYN_ATTENTION_R” “CBCL_SCR_SYN_RULEBREAK_R” “CBCL_SCR_SYN_AGGRESSIVE_R”
features: Child Personality(9) CBCL sum scores(8) Parent Mental Health(9)
Set up vector of names based on different categories of features
subj_info <- c("SUBJECTKEY","EVENTNAME","SITE_ID_L")
## features name vec
CBCL_sum_scores_names <-c("CBCL_SCR_SYN_ANXDEP_R","CBCL_SCR_SYN_WITHDEP_R","CBCL_SCR_SYN_SOMATIC_R","CBCL_SCR_SYN_SOCIAL_R","CBCL_SCR_SYN_THOUGHT_R","CBCL_SCR_SYN_ATTENTION_R","CBCL_SCR_SYN_RULEBREAK_R","CBCL_SCR_SYN_AGGRESSIVE_R")
Parent_Mental_Health_names<- c("ASR_SCR_PERSTR_R","ASR_SCR_ANXDEP_R","ASR_SCR_WITHDRAWN_R","ASR_SCR_SOMATIC_R","ASR_SCR_THOUGHT_R","ASR_SCR_ATTENTION_R","ASR_SCR_AGGRESSIVE_R","ASR_SCR_RULEBREAK_R","ASR_SCR_INTRUSIVE_R")
Child_Personality_names<- c("BISAvg","BASRRAvg","BASDriveAvg","BASFunAvg","UPPS_Y_SS_NEGATIVE_URGENCY","UPPS_Y_SS_LACK_OF_PLANNING","UPPS_Y_SS_SENSATION_SEEKING","UPPS_Y_SS_POSITIVE_URGENCY","UPPS_Y_SS_LACK_OF_PERSEVERANCE")
## features related to subject information
features <- c(CBCL_sum_scores_names,Parent_Mental_Health_names,Child_Personality_names)
all_features_no_dup <- all_sum_vars_no_dup %>% dplyr::select(all_of(subj_info),all_of(CBCL_sum_scores_names),all_of(Parent_Mental_Health_names),all_of(Child_Personality_names))
### select the data table with subject information and variable names
all_selected_no_dup <- all_sum_vars_no_dup %>% dplyr::select(all_of(subj_info),all_of(CBCL_sum_scores_names),all_of(Parent_Mental_Health_names),all_of(Child_Personality_names))
Making data splits based on the site.
site_col <- all_selected_no_dup %>%
distinct(SITE_ID_L) %>%
arrange(SITE_ID_L)
site_list <- as.list(site_col$SITE_ID_L)
site_char <- as.character(unlist(site_col$SITE_ID_L))
split_func <- function(site,data_input)
{
train_indices <- which(data_input$SITE_ID_L != site)
test_indices <- which(data_input$SITE_ID_L == site)
indices <-
list(analysis = train_indices,
assessment = test_indices)
split <- make_splits(indices, data_input)
return(split)}
split_list <- purrr::map(site_list, ~split_func(.x,data_input =all_selected_no_dup ))
names(split_list) <- site_char
process features
The following data processing function does not include combat and removal of outliers. What is does is only scale baseline train and test data and then scale the followup train and test data with mean and standard deviation from baseline respectively.
processed_psychopathology_scale_seperate_features_only <- purrr::map(.x= split_list,
~data_processing_cross_sites_seperate_no_combat_no_iqr(split_input = .))
baseline_train_gfactor <- purrr::map(gfactor_list,"output_train_baseline")
baseline_test_gfactor <- purrr::map(gfactor_list,"output_test_baseline")
followup_train_gfactor <- purrr::map(gfactor_list,"output_train_followup")
followup_test_gfactor <- purrr::map(gfactor_list,"output_test_followup")
Join the loaded data
processed_train_baseline_scale_seperate_all <-processed_psychopathology_scale_seperate_features_only %>%
purrr::map(.,~filter(.,EVENTNAME=="baseline_year_1_arm_1"& fold =="train")%>%
dplyr::select(-fold))
processed_test_baseline_scale_seperate_all <-processed_psychopathology_scale_seperate_features_only %>%
purrr::map(.,~filter(.,EVENTNAME=="baseline_year_1_arm_1"& fold =="test")%>%
dplyr::select(-fold))
processed_train_followup_scale_seperate_all <- processed_psychopathology_scale_seperate_features_only %>%
purrr::map(.,~filter(.,EVENTNAME=="2_year_follow_up_y_arm_1"& fold =="train")%>%
dplyr::select(-fold))
processed_test_followup_scale_seperate_all <- processed_psychopathology_scale_seperate_features_only %>%
purrr::map(.,~filter(.,EVENTNAME=="2_year_follow_up_y_arm_1"& fold =="test")%>%
dplyr::select(-fold))
psychopathology_baseline_train <- purrr::map2(.x =baseline_train_gfactor ,
.y=processed_train_baseline_scale_seperate_all,
~left_join(.x,.y, by = "SUBJECTKEY")%>% drop_na())
psychopathology_baseline_test <- purrr::map2(.x =baseline_test_gfactor ,.y=processed_test_baseline_scale_seperate_all,
~left_join(.x,.y, by ="SUBJECTKEY")%>% drop_na())
psychopathology_followup_train <- purrr::map2(.x =followup_train_gfactor ,
.y=processed_train_followup_scale_seperate_all,
~left_join(.x,.y, by = "SUBJECTKEY")%>% drop_na())
psychopathology_followup_test <- purrr::map2(.x =followup_test_gfactor ,
.y=processed_test_followup_scale_seperate_all,
~left_join(.x,.y, by = "SUBJECTKEY")%>% drop_na())
psychopathology_feature_names <- psychopathology_baseline_train[[1]] %>%
dplyr::select( -all_of(subj_info),-"gfactor")%>%
colnames()%>%
as.character()
psychopathology_baseline_train_select <- purrr::map(.x = psychopathology_baseline_train,
~dplyr::select(.x,all_of(psychopathology_feature_names),"gfactor"))
psychopathology_baseline_test_select <- purrr::map(.x = psychopathology_baseline_test,
~dplyr::select(.x,all_of(psychopathology_feature_names),"gfactor"))
psychopathology_followup_train_select <- purrr::map(.x = psychopathology_followup_train,
~dplyr::select(.x,all_of(psychopathology_feature_names),"gfactor"))
psychopathology_followup_test_select <- purrr::map(.x = psychopathology_followup_test ,
~dplyr::select(.x,all_of(psychopathology_feature_names),"gfactor"))
Baseline models and performance metrics.
### baseline
### setting up the recipe
features <- psychopathology_feature_names
psychopathology_baseline_recipe_list <- purrr::map(.x = psychopathology_baseline_train_select,
~recipe_prep(train_input=.x))
#recipe_input <- psychopathology_baseline_recipe_list[[1]]
### pls tuning
psychopathology_pls_fit_baseline <- psychopathology_baseline_recipe_list %>% purrr::map(.,~pls_tune(recipe_input = .))
psychopathology_pls_fit_wf <- purrr::map(psychopathology_pls_fit_baseline,"pls_final_wf")
### model final fit and prediction
psychopathology_pls_pred_baseline <- purrr::pmap(list(psychopathology_baseline_recipe_list,
psychopathology_pls_fit_wf,
psychopathology_baseline_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
psychopathology_pls_pred_baseline_results <- purrr::map(psychopathology_pls_pred_baseline,"model_predict")
psychopathology_pls_pred_baseline_train <- purrr::pmap(list(psychopathology_baseline_recipe_list,
psychopathology_pls_fit_wf,
psychopathology_baseline_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
psychopathology_pls_pred_baseline_results_train <- purrr::map(psychopathology_pls_pred_baseline_train,"model_predict")
### compute the performance metric
psychopathology_baseline_metric <- purrr::map2(.x=psychopathology_pls_pred_baseline_results,
.y=psychopathology_baseline_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
psychopathology_baseline_metric%>%
kableExtra::kbl(caption = "metrics for all sites in baseline") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.3658249 | 0.1303598 | 0.7421831 | 0.9241757 | site01 |
| 0.3313679 | 0.1016078 | 0.7432904 | 0.9476698 | site02 |
| 0.3381908 | 0.1101639 | 0.7402534 | 0.9425222 | site03 |
| 0.3678763 | 0.1333168 | 0.7312679 | 0.9303247 | site04 |
| 0.5597447 | 0.2956101 | 0.6585767 | 0.8381226 | site05 |
| 0.3410484 | 0.1077068 | 0.7405508 | 0.9443613 | site06 |
| 0.4784056 | 0.2254262 | 0.6850812 | 0.8787395 | site07 |
| 0.4093874 | 0.1668964 | 0.7210578 | 0.9113978 | site08 |
| 0.3879802 | 0.1504608 | 0.7294099 | 0.9205311 | site09 |
| 0.4028957 | 0.1622433 | 0.7113103 | 0.9134528 | site10 |
| 0.4258926 | 0.1813245 | 0.7134233 | 0.9048017 | site11 |
| 0.5021376 | 0.2485132 | 0.6750443 | 0.8668803 | site12 |
| 0.4243384 | 0.1798012 | 0.7145275 | 0.9050138 | site13 |
| 0.2501605 | 0.0387981 | 0.7721574 | 0.9795862 | site14 |
| 0.3671954 | 0.1313395 | 0.7533609 | 0.9303344 | site15 |
| 0.3353717 | 0.1049362 | 0.7334809 | 0.9455762 | site16 |
| 0.3828547 | 0.1446389 | 0.7209690 | 0.9240162 | site17 |
| 0.3260003 | 0.1001825 | 0.7566932 | 0.9473282 | site18 |
| 0.3988269 | 0.1574067 | 0.7273404 | 0.9175321 | site19 |
| 0.3984798 | 0.1576716 | 0.7191726 | 0.9171084 | site20 |
| 0.4396583 | 0.1928774 | 0.7099830 | 0.8969044 | site21 |
psychopathology_baseline_metric_avg <- average_metric_one_mod(metric_list =psychopathology_baseline_metric)
psychopathology_baseline_metric_avg_table <- psychopathology_baseline_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
avg_table_var_names <- c("correlation (sd)", "tradrsq (sd)","MAE (sd)","RMSE (sd)" )
psychopathology_baseline_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in baseline")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.392 (0.068) | 0.153 (0.057) | 0.724 (0.027) | 0.918 (0.031) |
Modelling followup
### setting up the recipe
psychopathology_followup_recipe_list <- purrr::map(.x = psychopathology_followup_train_select,
~recipe_prep(train_input=.x))
#recipe_input <- psychopathology_baseline_recipe_list[[1]]
### pls tuning
psychopathology_pls_fit_followup <- psychopathology_followup_recipe_list %>%
purrr::map(.,~pls_tune(recipe_input = .))
psychopathology_pls_fit_wf_followup <- purrr::map(psychopathology_pls_fit_followup,"pls_final_wf")
### model final fit and prediction
psychopathology_pls_pred_followup <- purrr::pmap(list(psychopathology_followup_recipe_list,
psychopathology_pls_fit_wf_followup,
psychopathology_followup_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
psychopathology_pls_pred_followup_results <- purrr::map(psychopathology_pls_pred_followup,"model_predict")
psychopathology_pls_pred_followup_train <- purrr::pmap(list(psychopathology_followup_recipe_list,
psychopathology_pls_fit_wf_followup,
psychopathology_followup_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
psychopathology_pls_pred_followup_results_train <- purrr::map(psychopathology_pls_pred_followup_train,"model_predict")
### compute the performance metric
psychopathology_followup_metric <- purrr::map2(.x=psychopathology_pls_pred_followup_results,
.y=psychopathology_followup_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
psychopathology_followup_metric%>%
kableExtra::kbl(caption = "metrics for all sites in followup") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.4952109 | 0.2431941 | 0.6122293 | 0.7813798 | site01 |
| 0.3786713 | 0.1079337 | 0.7370283 | 0.9342754 | site02 |
| 0.4003672 | 0.1470680 | 0.7223435 | 0.8859100 | site03 |
| 0.4093904 | 0.1647772 | 0.7256146 | 0.9288682 | site04 |
| 0.5372687 | 0.2853286 | 0.6396871 | 0.7840006 | site05 |
| 0.2730276 | 0.0216919 | 0.6745511 | 0.8709119 | site06 |
| 0.4502930 | 0.1768879 | 0.6599253 | 0.8607248 | site07 |
| 0.3557054 | 0.1185730 | 0.6877080 | 0.8648319 | site08 |
| 0.3198527 | 0.0728102 | 0.6923593 | 0.8783524 | site09 |
| 0.4091455 | 0.1659586 | 0.6663097 | 0.8450050 | site10 |
| 0.3219722 | 0.0863595 | 0.6984157 | 0.8997283 | site11 |
| 0.5387924 | 0.2751461 | 0.6097954 | 0.7743932 | site12 |
| 0.3996873 | 0.1341746 | 0.6821421 | 0.8498319 | site13 |
| 0.2786360 | 0.0514307 | 0.7229119 | 0.9276650 | site14 |
| 0.4819168 | 0.1625940 | 0.7736475 | 0.9633101 | site15 |
| 0.3356247 | 0.0919820 | 0.7246159 | 0.9159794 | site16 |
| 0.2866875 | 0.0184466 | 0.7355750 | 0.9204807 | site17 |
| 0.4154152 | 0.1507645 | 0.7053084 | 0.8761722 | site18 |
| 0.3515408 | 0.1095605 | 0.7807932 | 0.9774203 | site19 |
| 0.4492067 | 0.2017771 | 0.7151134 | 0.9041720 | site20 |
| 0.4311515 | 0.1754779 | 0.6439819 | 0.8270305 | site21 |
psychopathology_followup_metric_avg <- average_metric_one_mod(metric_list =psychopathology_followup_metric)
psychopathology_followup_metric_avg_table <- psychopathology_followup_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
avg_table_var_names <- c("correlation (sd)", "tradrsq (sd)","MAE (sd)","RMSE (sd)" )
psychopathology_followup_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in followup")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.396 (0.079) | 0.141 (0.073) | 0.696 (0.047) | 0.88 (0.056) |
Baseline
## get the model grid
psychopathology_pls_grid_baseline <- purrr::map(psychopathology_pls_fit_baseline,"pls_grid")
psychopathology_pls_param_baseline <- purrr::map(psychopathology_pls_fit_baseline,"best_pls_model")
factor_metric_plot <- function(grid_input, param_input){
selected_comp <- param_input$num_comp
comp_plot <- grid_input %>%
collect_metrics() %>%
ggplot(aes(num_comp, mean, col = .metric)) +
geom_point() +
geom_line() +
geom_vline(xintercept = selected_comp, size=1.5)+
scale_x_continuous(n.breaks = 26) +
labs(x = "Number of components",
y = "Indicator",
title = "Plot of RMSE vs number of components ",
subtitle = paste0("Optimal number of components is ", selected_comp)) +
facet_grid(.metric ~.) +
theme_few() +
theme(legend.position = "none")
return(comp_plot)
}
comp_metric_plot_baseline <- purrr::map2(.x = psychopathology_pls_grid_baseline,
.y = psychopathology_pls_param_baseline,
~factor_metric_plot(grid_input= .x ,
param_input = .y))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
comp_metric_plot_baseline
## $site01
##
## $site02
##
## $site03
##
## $site04
##
## $site05
##
## $site06
##
## $site07
##
## $site08
##
## $site09
##
## $site10
##
## $site11
##
## $site12
##
## $site13
##
## $site14
##
## $site15
##
## $site16
##
## $site17
##
## $site18
##
## $site19
##
## $site20
##
## $site21
Followup
## get the model grid
psychopathology_pls_grid_followup <- purrr::map(psychopathology_pls_fit_followup,"pls_grid")
psychopathology_pls_param_followup <- purrr::map(psychopathology_pls_fit_followup,"best_pls_model")
comp_metric_plot_followup <- purrr::map2(.x = psychopathology_pls_grid_followup,
.y = psychopathology_pls_param_followup,
~factor_metric_plot(grid_input= .x ,
param_input = .y))
comp_metric_plot_followup
## $site01
##
## $site02
##
## $site03
##
## $site04
##
## $site05
##
## $site06
##
## $site07
##
## $site08
##
## $site09
##
## $site10
##
## $site11
##
## $site12
##
## $site13
##
## $site14
##
## $site15
##
## $site16
##
## $site17
##
## $site18
##
## $site19
##
## $site20
##
## $site21
##baseline
Model across all sites. Combine the baseline train and baseline test of the first element of the list. It will cover all of the data.
data_all_site_baseline <- rbind(psychopathology_baseline_train_select[[1]],
psychopathology_baseline_test_select[[1]])
### data are scaled in recipe
all_data_recipe_baseline <- recipe_prep_scale(train_input=data_all_site_baseline)
## follow the function use a more parsimonious model
# Now find the least complex model that has no more than a 0.1% loss of RMSE:
all_data_fit_baseline <- pls_tune(recipe_input = all_data_recipe_baseline)
all_data_wf_baseline <- all_data_fit_baseline[["pls_final_wf"]]
all_data_final_fit_baseline <- all_data_wf_baseline%>%
parsnip::extract_spec_parsnip()%>%
parsnip::fit(data = data_all_site_baseline, formula= as.formula("gfactor~."))
tidy_all_data_final_fit_baseline <- all_data_final_fit_baseline%>%
tidy()
all_data_param_baseline <-all_data_fit_baseline[["best_pls_model"]][["num_comp"]]
### extract the variance explained by each component(read the paper)
var_explained <- all_data_final_fit_baseline[["fit"]][["prop_expl_var"]][["X"]]
### plotting feature importance based on the model with all the data
comp_idx_vec <- c(1:all_data_param_baseline)
tidy_all_data_final_fit_baseline <- tidy_all_data_final_fit_baseline %>%
rename(feature_names = term)
tidy_all_data_final_fit_baseline_with_name<- full_join(tidy_all_data_final_fit_baseline,
plotting_names, by = "feature_names")%>%
filter(feature_names != "Y", # outcome variable col name
)
tidy_all_data_final_fit_baseline_list <- purrr::map(comp_idx_vec,
~filter(tidy_all_data_final_fit_baseline_with_name,
component == .)%>%
dplyr::select(all_of(c("plotting_name","value"))))
### get the variable order from the first component:
tidy_all_data_final_fit_baseline_reordered <- tidy_all_data_final_fit_baseline_list[[1]] %>%
arrange(value)
tidy_all_data_final_fit_baseline_reordered<- tidy_all_data_final_fit_baseline_reordered$plotting_name
pls_vi_plot_with_label <- function(data_input=tidy_all_data_final_fit_baseline_list[[1]],
var_input = var_explained[1],
idx_input = comp_idx_vec[1],
reorder_name = tidy_all_data_final_fit_baseline_reordered){
### arrange the data with the character vector input
data_input <- data_input %>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =reorder_name))
### setting up the plot titles
range_value <- range(data_input$value)
var_title_long <- paste0("component ", idx_input," var explained ",round(var_input,3)*100,"%")
var_title_short <- paste0(round(var_input,3)*100,"%")
var_title_medium <- paste0("comp ", idx_input," \n ",round(var_input,3)*100,"%")
bar_plot <- ggplot(data_input, aes(x=.data[["value"]], y=plotting_name)) +
geom_bar(stat="identity")+
theme_classic() +
scale_x_continuous(limits = c(round(range_value[1],2)-0.05, round(range_value[2],2)+0.05),
breaks = c(round(range_value[1],2)-0.05,0, round(range_value[2],2)+0.05))+
labs(title = var_title_medium)+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 12),
legend.text = element_blank(),
plot.title = element_text(size=15))
return(bar_plot)
}
## get the component one plot
comp_one_plot <- pls_vi_plot_with_label(data_input=tidy_all_data_final_fit_baseline_list[[1]],
var_input = var_explained[1],
idx_input = comp_idx_vec[1])
pls_vi_plot_no_label <- function(data_input=tidy_all_data_final_fit_baseline_list[[2]],
var_input = var_explained[2],
idx_input = comp_idx_vec[2],
reorder_name = tidy_all_data_final_fit_baseline_reordered){
### arrange the data with the character vector input
data_input <- data_input %>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =reorder_name))
### setting up the plot titles
range_value <- range(data_input$value)
var_title_long <- paste0("component ", idx_input," var explained ",round(var_input,3)*100,"%")
var_title_short <- paste0(round(var_input,3)*100,"%")
var_title_medium <- paste0("comp ", idx_input," \n ",round(var_input,3)*100,"%")
bar_plot <- ggplot(data_input, aes(x=.data[["value"]], y=plotting_name)) +
geom_bar(stat="identity")+
scale_x_continuous(limits = c(round(range_value[1],2)-0.05, round(range_value[2],2)+0.05),
breaks = c(round(range_value[1],2)-0.05,0, round(range_value[2],2)+0.05))+
theme_classic() +
labs(title = var_title_medium)+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=15),
axis.ticks = element_blank())
return(bar_plot)
}
comp_other_plots <- purrr::pmap(list(tidy_all_data_final_fit_baseline_list[2:all_data_param_baseline],
var_explained[2:all_data_param_baseline],
comp_idx_vec[2:all_data_param_baseline
]),~pls_vi_plot_no_label(data_input=..1,
var_input = ..2,
idx_input = ..3))
comp_other_plots_combined <- ggpubr::ggarrange(plotlist=comp_other_plots, nrow =1,ncol = length(var_explained)-1)
## combine the plots together with carefully specified ratios
comp_plots_all <- gridExtra::grid.arrange(comp_one_plot,comp_other_plots_combined,nrow = 1, ncol = 2, widths = c(3.3, 4))
comp_plots_all
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
The bar plot for correlations across all sites. Train and test fold are joined together.
corr_data_all <- rbind(psychopathology_baseline_train[[1]],psychopathology_baseline_test[[1]])
corr_all_features <- purrr::map(.x = features,~cor(corr_data_all[[.x]],corr_data_all[["gfactor"]]))%>%
do.call(rbind,.)%>% as.numeric()
corr_all_features_cor_test <- purrr::map(.x = features,
~cor.test(corr_data_all[[.x]],corr_data_all[["gfactor"]],method="pearson"))
corr_all_features_ci <- purrr::map(corr_all_features_cor_test,"conf.int")%>%
do.call(rbind,.)%>% tibble::as_tibble()%>%
rename(low=V1,upp=V2)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
corr_output_tibble <- tibble(feature_names = features,value = corr_all_features)
corr_output_tibble <- cbind(corr_output_tibble,corr_all_features_ci)
corr_baseline_all_sites_names <- full_join(corr_output_tibble,plotting_names, by = "feature_names")
corr_baseline_all_sites_names%>%
mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray30",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
coord_flip()+
theme_classic() +
labs(y =paste0( "Correlation ") , x = "") +
theme(axis.title.x= element_text(size = 20),
axis.title.y= element_text(size = 20),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 20))
The plot without y-axis labels. Used in the aligned plot.
## use the order of baseline feature importance to order the correlation plot
## The order is the same with the magnitude of the univariate correlations
corr_baseline_all_sites_names_for_all <-corr_baseline_all_sites_names%>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =tidy_all_data_final_fit_baseline_reordered))
corr_bar_plot_baseline <- corr_baseline_all_sites_names_for_all%>%
#mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
scale_y_continuous(limits = c(-0.25, 0.25),
breaks = c(-0.25,0, 0.25))+
coord_flip()+
theme_classic() +
labs(title = "Univariate \ncorrelations")+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=15),
axis.ticks = element_blank())
vi_pls_plot_baseline_all <-ggpubr::ggarrange(comp_plots_all,corr_bar_plot_baseline,nrow = 1, ncol = 2, widths = c(8, 1.4))
vi_pls_plot_baseline_all
Use the same codes as baseline.
data_all_site_followup <- rbind(psychopathology_followup_train_select[[1]],
psychopathology_followup_test_select[[1]])
all_data_recipe_followup <- recipe_prep_scale(train_input=data_all_site_followup)
## follow the function use a more parsimonious model
## cut at the number of component that does not reduce 0.1% of the RMSE
all_data_fit_followup <- pls_tune(recipe_input = all_data_recipe_followup)
all_data_wf_followup <- all_data_fit_followup[["pls_final_wf"]]
all_data_final_fit_followup <- all_data_wf_followup%>%
parsnip::extract_spec_parsnip()%>%
parsnip::fit(data = data_all_site_followup, formula= as.formula("gfactor~."))
tidy_all_data_final_fit_followup <- all_data_final_fit_followup%>%
tidy()
all_data_param_followup <-all_data_fit_followup[["best_pls_model"]][["num_comp"]]
### extract the variance explained by each component
var_explained_followup <- all_data_final_fit_followup[["fit"]][["prop_expl_var"]][["X"]]
### plotting feature importance based on the model with all the data
comp_idx_vec_followup <- c(1:all_data_param_followup)
tidy_all_data_final_fit_followup <- tidy_all_data_final_fit_followup %>%
rename(feature_names = term)
tidy_all_data_final_fit_followup_with_name<- full_join(tidy_all_data_final_fit_followup,
plotting_names, by = "feature_names")%>%
filter(feature_names != "Y", # outcome variable col name
)
tidy_all_data_final_fit_followup_list <- purrr::map(comp_idx_vec_followup,
~filter(tidy_all_data_final_fit_followup_with_name,
component == .)%>%
dplyr::select(all_of(c("plotting_name","value"))))
### get the variable order from the first component:
tidy_all_data_final_fit_followup_reordered <- tidy_all_data_final_fit_followup_list[[1]] %>%
arrange(value)
tidy_all_data_final_fit_followup_reordered<- tidy_all_data_final_fit_followup_reordered$plotting_name
comp_one_plot_followup <- pls_vi_plot_with_label(data_input=tidy_all_data_final_fit_followup_list[[1]],
var_input = var_explained_followup[1],
idx_input = comp_idx_vec_followup[1],
reorder_name=tidy_all_data_final_fit_followup_reordered)
comp_other_plots_followup <- purrr::pmap(list(tidy_all_data_final_fit_followup_list[2:all_data_param_followup],
var_explained_followup[2:all_data_param_followup],
comp_idx_vec_followup[2:all_data_param_followup]
),~pls_vi_plot_no_label(data_input=..1,
var_input = ..2,
idx_input = ..3,
reorder_name=tidy_all_data_final_fit_followup_reordered))
comp_other_plots_combined_followup <- ggpubr::ggarrange(plotlist=comp_other_plots_followup,
nrow =1,ncol = length(var_explained_followup)-1)
comp_plots_all_followup <- gridExtra::grid.arrange(comp_one_plot_followup,
comp_other_plots_combined_followup,
nrow = 1, ncol = 2, widths = c(3.3, 4))
comp_plots_all_followup
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
The bar plot for correlations across all sites. Train and test fold are joined together.
corr_data_all_followup <- rbind(psychopathology_followup_train[[1]],psychopathology_followup_test[[1]])
corr_all_features_followup <- purrr::map(.x = features,~cor(corr_data_all_followup[[.x]],
corr_data_all_followup[["gfactor"]]))%>%
do.call(rbind,.)%>% as.numeric()
corr_all_features_cor_test_followup <- purrr::map(.x = features,
~cor.test(corr_data_all_followup[[.x]],
corr_data_all_followup[["gfactor"]],method="pearson"))
corr_all_features_ci_followup <- purrr::map(corr_all_features_cor_test_followup,"conf.int")%>%
do.call(rbind,.)%>% tibble::as_tibble()%>%
rename(low=V1,upp=V2)
corr_output_tibble_followup <- tibble(feature_names = features,value = corr_all_features_followup)
corr_output_tibble_followup <- cbind(corr_output_tibble_followup,corr_all_features_ci_followup)
corr_followup_all_sites_names <- full_join(corr_output_tibble_followup,
plotting_names, by = "feature_names")
corr_followup_all_sites_names%>%
mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray30",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
coord_flip()+
theme_classic() +
labs(y =paste0( "Correlation ") , x = "") +
theme(axis.title.x= element_text(size = 20),
axis.title.y= element_text(size = 20),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 20))
corr_followup_all_sites_names_for_all <-corr_followup_all_sites_names%>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =tidy_all_data_final_fit_followup_reordered))
corr_bar_plot_followup <- corr_followup_all_sites_names_for_all%>%
#mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
scale_y_continuous(limits = c(-0.25, 0.25),
breaks = c(-0.25,0, 0.25))+##use this because all the plots should have the same height
coord_flip()+
theme_classic() +
labs(title = "Univariate \ncorrelations")+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=15),
axis.ticks = element_blank())
vi_pls_plot_followup_all <-ggpubr::ggarrange(comp_plots_all_followup,
corr_bar_plot_followup,
nrow = 1, ncol = 2, widths = c(8, 1.4))
vi_pls_plot_followup_all
Process the feature importance plots: align the baseline and followup plot together.
vi_pls_plot_baseline_label <- vi_pls_plot_baseline_all %>%
ggpubr::annotate_figure(top = ggpubr::text_grob("Baseline",size=20,hjust=2.5))
vi_pls_plot_followup_label <- vi_pls_plot_followup_all %>%
ggpubr::annotate_figure(top = ggpubr::text_grob("Followup",size=20,hjust=2.5))
vi_pls_plot_label <- ggpubr::ggarrange(vi_pls_plot_baseline_label,vi_pls_plot_followup_label,nrow = 2)
title_vi_pls_plot <- ggpubr::annotate_figure(vi_pls_plot_label,
top = ggpubr::text_grob("Feature importance of Partial Least Squares Regressions \nPredicting Cognitive Abilities from Mental Health Variables",size=25, face = "bold"))
title_vi_pls_plot
output_list <- list(baseline_train_pred = psychopathology_pls_pred_baseline_results_train,
baseline_test_pred = psychopathology_pls_pred_baseline_results,
baseline_train_data = psychopathology_baseline_train,
baseline_test_data = psychopathology_baseline_test,
followup_train_pred = psychopathology_pls_pred_followup_results_train,
followup_test_pred = psychopathology_pls_pred_followup_results,
followup_train_data =psychopathology_followup_train ,
followup_test_data = psychopathology_followup_test)
saveRDS(output_list,paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/psychopathology_pls_pred_2.0', '.RData'))
parents_baseline_train_select <- purrr::map(.x = psychopathology_baseline_train_select,
~dplyr::select(.,all_of(c(Parent_Mental_Health_names,"gfactor"))))
parents_baseline_test_select <- purrr::map(.x = psychopathology_baseline_test_select,
~dplyr::select(.,all_of(c(Parent_Mental_Health_names,"gfactor"))))
parents_followup_train_select <- purrr::map(.x = psychopathology_followup_train_select,
~dplyr::select(.,all_of(c(Parent_Mental_Health_names,"gfactor"))))
parents_followup_test_select <- purrr::map(.x = psychopathology_followup_test_select,
~dplyr::select(.,all_of(c(Parent_Mental_Health_names,"gfactor"))))
### fit the enet model
### baseline
parents_baseline_recipe_list <- purrr::map(.x = parents_baseline_train_select,
~recipe_prep(train_input=.x,features_input = Parent_Mental_Health_names))
parents_pls_fit_baseline <- parents_baseline_recipe_list %>%
purrr::map(.,~pls_tune(recipe_input = .,feature_input = Parent_Mental_Health_names))
parents_pls_fit_wf <- purrr::map(parents_pls_fit_baseline,"pls_final_wf")
parents_pls_pred_baseline <- purrr::pmap(list(parents_baseline_recipe_list,
parents_pls_fit_wf,
parents_baseline_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
parents_pls_pred_baseline_results <- purrr::map(parents_pls_pred_baseline,"model_predict")
parents_pls_pred_baseline_train <- purrr::pmap(list(parents_baseline_recipe_list,
parents_pls_fit_wf,
parents_baseline_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
parents_pls_pred_baseline_results_train <- purrr::map(parents_pls_pred_baseline_train,"model_predict")
parents_baseline_metric <- purrr::map2(.x=parents_pls_pred_baseline_results,
.y=psychopathology_baseline_test,
~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
parents_baseline_metric%>%
kableExtra::kbl(caption = "metrics for all sites in baseline") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.1774546 | 0.0253694 | 0.7794813 | 0.9783738 | site01 |
| 0.1605865 | 0.0159658 | 0.7810571 | 0.9918116 | site02 |
| 0.1342591 | 0.0046575 | 0.7860262 | 0.9968340 | site03 |
| 0.1850618 | 0.0300186 | 0.7793983 | 0.9842061 | site04 |
| 0.3572455 | 0.1170182 | 0.7358356 | 0.9383759 | site05 |
| 0.2086662 | 0.0418865 | 0.7701106 | 0.9785722 | site06 |
| 0.3509817 | 0.1143150 | 0.7326500 | 0.9396550 | site07 |
| 0.1497906 | 0.0145928 | 0.7709818 | 0.9912116 | site08 |
| 0.2295732 | 0.0526340 | 0.7773551 | 0.9720882 | site09 |
| 0.2113015 | 0.0439401 | 0.7643733 | 0.9758199 | site10 |
| 0.2590552 | 0.0670191 | 0.7535869 | 0.9659038 | site11 |
| 0.4167393 | 0.1454727 | 0.7266680 | 0.9244031 | site12 |
| 0.2205384 | 0.0468376 | 0.7632017 | 0.9756164 | site13 |
| 0.0938130 | -0.0183780 | 0.8103373 | 1.0083002 | site14 |
| 0.1932776 | 0.0363187 | 0.7962477 | 0.9798977 | site15 |
| 0.1110058 | -0.0098377 | 0.7819172 | 1.0043736 | site16 |
| 0.1374260 | 0.0018720 | 0.7806215 | 0.9981549 | site17 |
| 0.1077463 | -0.0099094 | 0.8010771 | 1.0036088 | site18 |
| 0.2134093 | 0.0417061 | 0.7770648 | 0.9785018 | site19 |
| 0.2144236 | 0.0435759 | 0.7629036 | 0.9772489 | site20 |
| 0.3598255 | 0.1188617 | 0.7404714 | 0.9371271 | site21 |
parents_baseline_metric_avg <- average_metric_one_mod(metric_list =parents_baseline_metric)
parents_baseline_metric_avg_table <- parents_baseline_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
parents_baseline_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in baseline")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.214 (0.09) | 0.044 (0.046) | 0.77 (0.022) | 0.976 (0.024) |
### fit the enet model
### followup
parents_followup_recipe_list <- purrr::map(.x=parents_followup_train_select,
~recipe_prep(train_input=.x,
features_input = Parent_Mental_Health_names))
parents_pls_fit_followup <- parents_followup_recipe_list %>%
purrr::map(.,~pls_tune(recipe_input = .,feature_input = Parent_Mental_Health_names))
parents_pls_fit_wf_followup <- purrr::map(parents_pls_fit_followup,"pls_final_wf")
parents_pls_pred_followup <- purrr::pmap(list(parents_followup_recipe_list,
parents_pls_fit_wf_followup,
parents_followup_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
parents_pls_pred_followup_results <- purrr::map(parents_pls_pred_followup,"model_predict")
parents_pls_pred_followup_train <- purrr::pmap(list(parents_followup_recipe_list,
parents_pls_fit_wf_followup,
parents_followup_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
parents_pls_pred_followup_results_train <- purrr::map(parents_pls_pred_followup_train,"model_predict")
parents_followup_metric <- purrr::map2(.x=parents_pls_pred_followup_results,
.y=psychopathology_followup_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
parents_followup_metric%>%
kableExtra::kbl(caption = "metrics for all sites in followup") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.3141114 | 0.0985489 | 0.6736215 | 0.8527879 | site01 |
| 0.2153789 | -0.0047732 | 0.7839466 | 0.9915403 | site02 |
| 0.1962820 | 0.0321684 | 0.7476408 | 0.9436964 | site03 |
| 0.1833391 | 0.0243764 | 0.7905057 | 1.0039083 | site04 |
| 0.3495083 | 0.1115581 | 0.7121534 | 0.8741334 | site05 |
| 0.1685226 | 0.0214813 | 0.6717714 | 0.8710057 | site06 |
| 0.3597345 | 0.0829015 | 0.7187615 | 0.9085374 | site07 |
| 0.1383235 | -0.0080121 | 0.7275678 | 0.9248502 | site08 |
| 0.1717375 | 0.0105191 | 0.7226130 | 0.9073779 | site09 |
| 0.2253441 | 0.0497414 | 0.7156415 | 0.9019582 | site10 |
| 0.1835246 | 0.0178754 | 0.7278881 | 0.9328397 | site11 |
| 0.3436594 | 0.1060749 | 0.6812224 | 0.8599770 | site12 |
| 0.1190815 | -0.0287100 | 0.7327159 | 0.9263271 | site13 |
| 0.0998072 | -0.0230309 | 0.7556889 | 0.9633875 | site14 |
| 0.3007732 | 0.0269021 | 0.8347918 | 1.0384278 | site15 |
| 0.1267760 | -0.0187616 | 0.7548126 | 0.9702302 | site16 |
| 0.1355823 | -0.0420926 | 0.7408100 | 0.9484422 | site17 |
| 0.0752281 | -0.0583869 | 0.7657335 | 0.9781323 | site18 |
| 0.1899620 | 0.0229174 | 0.8235628 | 1.0238699 | site19 |
| 0.2462298 | 0.0589246 | 0.7773010 | 0.9817506 | site20 |
| 0.2910651 | 0.0802868 | 0.6916655 | 0.8734671 | site21 |
parents_followup_metric_avg <- average_metric_one_mod(metric_list =parents_followup_metric)
parents_followup_metric_avg_table <- parents_followup_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
parents_followup_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in followup")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.211 (0.086) | 0.027 (0.049) | 0.74 (0.045) | 0.937 (0.055) |
CBCL_baseline_train_select <- purrr::map(.x = psychopathology_baseline_train_select,
~dplyr::select(.,all_of(c(CBCL_sum_scores_names,"gfactor"))))
CBCL_baseline_test_select <- purrr::map(.x = psychopathology_baseline_test_select,
~dplyr::select(.,all_of(c(CBCL_sum_scores_names,"gfactor"))))
CBCL_followup_train_select <- purrr::map(.x = psychopathology_followup_train_select,
~dplyr::select(.,all_of(c(CBCL_sum_scores_names,"gfactor"))))
CBCL_followup_test_select <- purrr::map(.x = psychopathology_followup_test_select,
~dplyr::select(.,all_of(c(CBCL_sum_scores_names,"gfactor"))))
### fit the enet model
### baseline
CBCL_baseline_recipe_list <- purrr::map(.x = CBCL_baseline_train_select,
~recipe_prep(train_input=.x,features_input = CBCL_sum_scores_names))
CBCL_pls_fit_baseline <- CBCL_baseline_recipe_list%>%
purrr::map(.,~pls_tune(recipe_input = .,feature_input = CBCL_sum_scores_names))
CBCL_pls_fit_wf <- purrr::map(CBCL_pls_fit_baseline,"pls_final_wf")
CBCL_pls_pred_baseline <- purrr::pmap(list(CBCL_baseline_recipe_list,
CBCL_pls_fit_wf,
CBCL_baseline_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
CBCL_pls_pred_baseline_results <- purrr::map(CBCL_pls_pred_baseline,"model_predict")
CBCL_pls_pred_baseline_train <- purrr::pmap(list(CBCL_baseline_recipe_list,
CBCL_pls_fit_wf,
CBCL_baseline_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
CBCL_pls_pred_baseline_results_train <- purrr::map(CBCL_pls_pred_baseline_train,"model_predict")
CBCL_baseline_metric <- purrr::map2(.x=CBCL_pls_pred_baseline_results,
.y=psychopathology_baseline_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
CBCL_baseline_metric%>%
kableExtra::kbl(caption = "metrics for all sites in baseline") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.2852654 | 0.0806783 | 0.7614239 | 0.9502077 | site01 |
| 0.1638130 | 0.0118078 | 0.7718336 | 0.9939048 | site02 |
| 0.3193542 | 0.1007515 | 0.7519156 | 0.9474939 | site03 |
| 0.2646696 | 0.0698236 | 0.7538343 | 0.9638002 | site04 |
| 0.3042560 | 0.0924329 | 0.7340396 | 0.9513501 | site05 |
| 0.2205012 | 0.0391954 | 0.7677496 | 0.9799455 | site06 |
| 0.2934250 | 0.0857120 | 0.7520055 | 0.9547073 | site07 |
| 0.3128640 | 0.0971566 | 0.7540734 | 0.9487783 | site08 |
| 0.2964436 | 0.0876855 | 0.7597014 | 0.9539356 | site09 |
| 0.2962309 | 0.0877278 | 0.7490031 | 0.9532117 | site10 |
| 0.2814532 | 0.0791531 | 0.7558358 | 0.9596022 | site11 |
| 0.2753091 | 0.0748223 | 0.7548884 | 0.9618581 | site12 |
| 0.3503260 | 0.1182709 | 0.7390113 | 0.9383465 | site13 |
| 0.1560931 | 0.0054219 | 0.7863803 | 0.9964483 | site14 |
| 0.2793443 | 0.0780096 | 0.7839301 | 0.9584671 | site15 |
| 0.2961880 | 0.0877240 | 0.7375309 | 0.9546246 | site16 |
| 0.2985057 | 0.0891046 | 0.7494149 | 0.9535403 | site17 |
| 0.2807598 | 0.0788090 | 0.7666843 | 0.9585132 | site18 |
| 0.2677343 | 0.0701404 | 0.7714832 | 0.9638755 | site19 |
| 0.2638427 | 0.0675426 | 0.7496668 | 0.9649270 | site20 |
| 0.2162432 | 0.0442271 | 0.7650651 | 0.9760089 | site21 |
CBCL_baseline_metric_avg <- average_metric_one_mod(metric_list =CBCL_baseline_metric)
CBCL_baseline_metric_avg_table <- CBCL_baseline_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
CBCL_baseline_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in baseline")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.273 (0.048) | 0.074 (0.028) | 0.758 (0.014) | 0.961 (0.015) |
### fit the enet model
### followup
CBCL_followup_recipe_list <- purrr::map(.x=CBCL_followup_train_select,
~recipe_prep(train_input=.x,features_input = CBCL_sum_scores_names))
CBCL_pls_fit_followup <- CBCL_followup_recipe_list %>%
purrr::map(.,~pls_tune(recipe_input = .,feature_input = CBCL_sum_scores_names))
CBCL_pls_fit_wf_followup <- purrr::map(CBCL_pls_fit_followup,"pls_final_wf")
CBCL_pls_pred_followup <- purrr::pmap(list(CBCL_followup_recipe_list,
CBCL_pls_fit_wf_followup,
CBCL_followup_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
CBCL_pls_pred_followup_results <- purrr::map(CBCL_pls_pred_followup,"model_predict")
CBCL_pls_pred_followup_train <- purrr::pmap(list(CBCL_followup_recipe_list,
CBCL_pls_fit_wf_followup,
CBCL_followup_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
CBCL_pls_pred_followup_results_train <-purrr::map(CBCL_pls_pred_followup_train,"model_predict")
CBCL_followup_metric <- purrr::map2(.x=CBCL_pls_pred_followup_results,
.y=psychopathology_followup_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
CBCL_followup_metric%>%
kableExtra::kbl(caption = "metrics for all sites in followup") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.2233917 | 0.0431689 | 0.7033007 | 0.8785927 | site01 |
| 0.2118718 | -0.0088482 | 0.7889409 | 0.9935490 | site02 |
| 0.2779015 | 0.0708403 | 0.7435611 | 0.9246504 | site03 |
| 0.2043701 | 0.0337593 | 0.7818379 | 0.9990692 | site04 |
| 0.3408640 | 0.1131536 | 0.7123041 | 0.8733482 | site05 |
| 0.1702036 | 0.0008550 | 0.6846625 | 0.8801378 | site06 |
| 0.2353501 | 0.0223710 | 0.7066789 | 0.9380410 | site07 |
| 0.3322801 | 0.0985400 | 0.6893517 | 0.8746046 | site08 |
| 0.1707499 | 0.0182320 | 0.7281113 | 0.9038345 | site09 |
| 0.2121533 | 0.0390521 | 0.7139485 | 0.9070170 | site10 |
| 0.3158694 | 0.0789481 | 0.7036494 | 0.9033702 | site11 |
| 0.2802729 | 0.0782401 | 0.6911336 | 0.8732632 | site12 |
| 0.2748151 | 0.0589357 | 0.7091351 | 0.8859873 | site13 |
| 0.1695873 | 0.0137414 | 0.7494300 | 0.9459149 | site14 |
| 0.2791967 | -0.0022583 | 0.8464142 | 1.0538720 | site15 |
| 0.2262949 | 0.0361840 | 0.7450453 | 0.9437035 | site16 |
| 0.2460411 | 0.0128788 | 0.7226655 | 0.9230877 | site17 |
| 0.2726952 | 0.0551548 | 0.7398666 | 0.9241782 | site18 |
| 0.1316393 | 0.0043793 | 0.8389213 | 1.0335372 | site19 |
| 0.2577099 | 0.0654529 | 0.7691142 | 0.9783394 | site20 |
| 0.2315294 | 0.0518044 | 0.6934370 | 0.8868891 | site21 |
CBCL_followup_metric_avg <- average_metric_one_mod(metric_list =CBCL_followup_metric)
CBCL_followup_metric_avg_table <- CBCL_followup_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
CBCL_followup_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in followup")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.241 (0.055) | 0.042 (0.034) | 0.736 (0.046) | 0.93 (0.054) |
child_baseline_train_select <- purrr::map(.x = psychopathology_baseline_train_select,
~dplyr::select(.,all_of(c( Child_Personality_names,"gfactor"))))
child_baseline_test_select <- purrr::map(.x = psychopathology_baseline_test_select,
~dplyr::select(.,all_of(c( Child_Personality_names,"gfactor"))))
child_followup_train_select <- purrr::map(.x = psychopathology_followup_train_select,
~dplyr::select(.,all_of(c( Child_Personality_names,"gfactor"))))
child_followup_test_select <- purrr::map(.x = psychopathology_followup_test_select,
~dplyr::select(.,all_of(c( Child_Personality_names,"gfactor"))))
### fit the enet model
### baseline
child_baseline_recipe_list <- purrr::map(.x = child_baseline_train_select,
~recipe_prep(train_input=.x,features_input = Child_Personality_names))
child_pls_fit_baseline <- child_baseline_recipe_list %>%
purrr::map(.,~pls_tune(recipe_input = .,feature_input = Child_Personality_names))
child_pls_fit_wf <- purrr::map(child_pls_fit_baseline,"pls_final_wf")
child_pls_pred_baseline <- purrr::pmap(list(child_baseline_recipe_list,
child_pls_fit_wf,
child_baseline_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
child_pls_pred_baseline_results <- purrr::map(child_pls_pred_baseline,"model_predict")
child_pls_pred_baseline_train <- pmap(list(child_baseline_recipe_list,
child_pls_fit_wf,
child_baseline_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
child_pls_pred_baseline_results_train <- purrr::map(child_pls_pred_baseline_train,"model_predict")
child_baseline_metric <- map2(.x=child_pls_pred_baseline_results,
.y=psychopathology_baseline_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
child_baseline_metric%>%
kableExtra::kbl(caption = "metrics for all sites in baseline") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.2301139 | 0.0503490 | 0.7780712 | 0.9657546 | site01 |
| 0.2874749 | 0.0824662 | 0.7419444 | 0.9577124 | site02 |
| 0.1467884 | 0.0010057 | 0.7965594 | 0.9986610 | site03 |
| 0.2575813 | 0.0652398 | 0.7581326 | 0.9661721 | site04 |
| 0.4070034 | 0.1547431 | 0.7202570 | 0.9181113 | site05 |
| 0.2398759 | 0.0539322 | 0.7639640 | 0.9724013 | site06 |
| 0.3167741 | 0.0995826 | 0.7478518 | 0.9474378 | site07 |
| 0.3068713 | 0.0939157 | 0.7518646 | 0.9504796 | site08 |
| 0.2035175 | 0.0353097 | 0.7944463 | 0.9809361 | site09 |
| 0.2422655 | 0.0566274 | 0.7536802 | 0.9693236 | site10 |
| 0.3332697 | 0.1087794 | 0.7426469 | 0.9440394 | site11 |
| 0.3238142 | 0.1034506 | 0.7446165 | 0.9468595 | site12 |
| 0.2435109 | 0.0566474 | 0.7666898 | 0.9705830 | site13 |
| 0.2155426 | 0.0407826 | 0.7726659 | 0.9785745 | site14 |
| 0.2746212 | 0.0750269 | 0.7840942 | 0.9600162 | site15 |
| 0.2220486 | 0.0432180 | 0.7553823 | 0.9776334 | site16 |
| 0.3363135 | 0.1113770 | 0.7372554 | 0.9418107 | site17 |
| 0.2264853 | 0.0484188 | 0.7581694 | 0.9741956 | site18 |
| 0.2572657 | 0.0646609 | 0.7638518 | 0.9667113 | site19 |
| 0.2923401 | 0.0852555 | 0.7549576 | 0.9557182 | site20 |
| 0.2689318 | 0.0721260 | 0.7410211 | 0.9616586 | site21 |
child_baseline_metric_avg <- average_metric_one_mod(metric_list =child_baseline_metric)
child_baseline_metric_avg_table <- child_baseline_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
child_baseline_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in baseline")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.268 (0.057) | 0.072 (0.033) | 0.758 (0.019) | 0.962 (0.017) |
### fit the enet model
### followup
child_followup_recipe_list <- purrr::map(.x=child_followup_train_select,~recipe_prep(train_input=.x,
features_input = Child_Personality_names))
child_pls_fit_followup <- child_followup_recipe_list %>%
purrr::map(.,~pls_tune(recipe_input = .,feature_input = Child_Personality_names))
child_pls_fit_wf_followup <- purrr::map(child_pls_fit_followup,"pls_final_wf")
child_pls_pred_followup <- pmap(list(child_followup_recipe_list,
child_pls_fit_wf_followup,
child_followup_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
child_pls_pred_followup_results <- purrr::map(child_pls_pred_followup,"model_predict")
child_pls_pred_followup_train <- pmap(list(child_followup_recipe_list,
child_pls_fit_wf_followup,
child_followup_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
child_pls_pred_followup_results_train <- purrr::map(child_pls_pred_followup_train,"model_predict")
child_followup_metric <- map2(.x=child_pls_pred_followup_results,
.y=psychopathology_followup_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
child_followup_metric%>%
kableExtra::kbl(caption = "metrics for all sites in followup") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.4218630 | 0.1753951 | 0.6411196 | 0.8156294 | site01 |
| 0.2920636 | 0.0567293 | 0.7565497 | 0.9607149 | site02 |
| 0.2800944 | 0.0558003 | 0.7632825 | 0.9321039 | site03 |
| 0.3432039 | 0.1113752 | 0.7532259 | 0.9581028 | site04 |
| 0.4224944 | 0.1724779 | 0.6842332 | 0.8436319 | site05 |
| 0.2164499 | 0.0087977 | 0.6891667 | 0.8766325 | site06 |
| 0.2967338 | 0.0701306 | 0.7033264 | 0.9148413 | site07 |
| 0.2359942 | 0.0453781 | 0.7032737 | 0.9000243 | site08 |
| 0.2907953 | 0.0798290 | 0.6962024 | 0.8750216 | site09 |
| 0.2937269 | 0.0821288 | 0.6872819 | 0.8864543 | site10 |
| 0.1729606 | -0.0149503 | 0.7340666 | 0.9483008 | site11 |
| 0.4618552 | 0.1990698 | 0.6385183 | 0.8140173 | site12 |
| 0.3227122 | 0.0854849 | 0.7062496 | 0.8734003 | site13 |
| 0.2556427 | 0.0524753 | 0.7229956 | 0.9271541 | site14 |
| 0.3416293 | 0.0520811 | 0.8449642 | 1.0249051 | site15 |
| 0.2732442 | 0.0568895 | 0.7437904 | 0.9335117 | site16 |
| 0.1991069 | -0.0201470 | 0.7311221 | 0.9384023 | site17 |
| 0.3858643 | 0.1320589 | 0.6989886 | 0.8857691 | site18 |
| 0.3237461 | 0.1027530 | 0.7876615 | 0.9811494 | site19 |
| 0.3561586 | 0.1257222 | 0.7484811 | 0.9462670 | site20 |
| 0.3074633 | 0.0800055 | 0.6752093 | 0.8736007 | site21 |
child_followup_metric_avg <- average_metric_one_mod(metric_list =child_followup_metric)
child_followup_metric_avg_table <- child_followup_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
child_followup_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in followup")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.309 (0.074) | 0.081 (0.058) | 0.72 (0.048) | 0.91 (0.053) |
Join the predicted values of three different sets of featues of mental health together.
psy_seperate_baseline <- list(parents=parents_pls_pred_baseline_results %>% do.call(rbind,.),
CBCL=CBCL_pls_pred_baseline_results%>% do.call(rbind,.),
child=child_pls_pred_baseline_results%>% do.call(rbind,.))
psy_seperate_followup <- list(parents=parents_pls_pred_followup_results %>% do.call(rbind,.),
CBCL=CBCL_pls_pred_followup_results%>% do.call(rbind,.),
child=child_pls_pred_followup_results%>% do.call(rbind,.))
baseline_corr_string_vec <- c(parents_baseline_metric_avg_table$`correlation (sd)`,
CBCL_baseline_metric_avg_table$`correlation (sd)`,
child_baseline_metric_avg_table$`correlation (sd)`)
followup_corr_string_vec <- c(parents_followup_metric_avg_table$`correlation (sd)`,
CBCL_followup_metric_avg_table$`correlation (sd)`,
child_followup_metric_avg_table$`correlation (sd)`)
plotting_titles <- c("ASR","CBCL", "Child Personality")
psy_seperate_plot_baseline <- pmap(list( psy_seperate_baseline,plotting_titles,baseline_corr_string_vec),
~scatter_plot_gfactor_string(data_input=..1,
name_input=..2,
corr_string_input = ..3))
title_baseline <- ggdraw() +
draw_label(
"Baseline",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
psy_seperate_plot_baseline_grid <- cowplot::plot_grid(title_baseline,
cowplot::plot_grid(plotlist = psy_seperate_plot_baseline,nrow = 1,ncol = 3),
nrow = 2 ,
rel_heights = c(0.1, 1))
## Warning: Removed 2 rows containing non-finite values (`stat_pointdensity()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 9 rows containing non-finite values (`stat_pointdensity()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 9 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
title_scatter_plot_all <- ggdraw() +
draw_label(
"Out-of-Sample Predictive Ability",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
labelled_scatter_baseline <- ggpubr::annotate_figure(psy_seperate_plot_baseline_grid,
left= ggpubr::text_grob("Observed Cognitive Performance (Z)",size=15,rot=90),
bottom = ggpubr::text_grob("Predicted Cognitive Performance (Z)",size=15))
cowplot::plot_grid(title_scatter_plot_all,labelled_scatter_baseline,
nrow = 2 ,
rel_heights = c(0.1, 1))
psy_seperate_plot_followup <- pmap(list( psy_seperate_followup, plotting_titles,followup_corr_string_vec),
~scatter_plot_gfactor_string(data_input=..1,
name_input=..2,
corr_string_input = ..3))
title_followup <- ggdraw() +
draw_label(
"Followup",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
psy_seperate_plot_followup_grid <- cowplot::plot_grid(title_followup,
cowplot::plot_grid(plotlist = psy_seperate_plot_followup,nrow = 1,ncol = 3),
nrow = 2 ,
rel_heights = c(0.1, 1))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 9 rows containing non-finite values (`stat_pointdensity()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 9 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
labelled_scatter_followup <- ggpubr::annotate_figure(psy_seperate_plot_followup_grid,
left= ggpubr::text_grob("Observed Cognitive Performance (Z)",size=15,rot=90),
bottom = ggpubr::text_grob("Predicted Cognitive Performance (Z)",size=15))
cowplot::plot_grid(title_scatter_plot_all,labelled_scatter_followup,
nrow = 2 ,
rel_heights = c(0.1, 1))
save the metrics performances
psychopathology_baseline_metric_output_table <- psychopathology_baseline_metric_avg_table %>%
mutate(modality = "Mental Health")
psychopathology_followup_metric_output_table <- psychopathology_followup_metric_avg_table %>%
mutate(modality = "Mental Health")
parents_baseline_metric_output_table <- parents_baseline_metric_avg_table %>%
mutate(modality = "ASR")
parents_followup_metric_output_table <- parents_followup_metric_avg_table %>%
mutate(modality = "ASR")
CBCL_baseline_metric_output_table <- CBCL_baseline_metric_avg_table %>%
mutate(modality = "CBCL")
CBCL_followup_metric_output_table<- CBCL_followup_metric_avg_table %>%
mutate(modality = "CBCL")
child_baseline_metric_output_table<- child_baseline_metric_avg_table %>%
mutate(modality = "Child personality")
child_followup_metric_output_table<- child_followup_metric_avg_table %>%
mutate(modality = "Child personality")
baseline_output_table <- bind_rows(psychopathology_baseline_metric_output_table,
CBCL_baseline_metric_output_table,
parents_baseline_metric_output_table,
child_baseline_metric_output_table) %>%
mutate(event = "baseline")
followup_output_table <- bind_rows(psychopathology_followup_metric_output_table,
CBCL_followup_metric_output_table,
parents_followup_metric_output_table,
child_followup_metric_output_table) %>%
mutate(event = "followup")
output_table <- bind_rows(baseline_output_table,followup_output_table)
saveRDS(output_table,paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/performance_metrics/mental_health_performance_metric', '.RData'))
save the scatterplot list
saveRDS(psy_seperate_plot_baseline,paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/scatter_plots/psy_seperate_plot_baseline', '.RData'))
saveRDS(psy_seperate_plot_followup,paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/scatter_plots/psy_seperate_plot_followup', '.RData'))